From 6eb638318a8bea65bcf5d7b48a33f744e795e333 Mon Sep 17 00:00:00 2001 From: Erik Joelsson Date: Tue, 7 May 2019 12:32:19 -0700 Subject: [PATCH 1/7] 8223319: Add copyright footer to specs and man pages Reviewed-by: iris, sspitsyn --- make/Docs.gmk | 114 ++++++++++++++++++++++------- make/common/TextFileProcessing.gmk | 8 +- src/hotspot/share/prims/jvmti.xml | 10 +-- src/hotspot/share/prims/jvmti.xsl | 71 ++++++++---------- 4 files changed, 123 insertions(+), 80 deletions(-) diff --git a/make/Docs.gmk b/make/Docs.gmk index 93e20d94461..3668a602aa4 100644 --- a/make/Docs.gmk +++ b/make/Docs.gmk @@ -32,6 +32,7 @@ include ModuleTools.gmk include ProcessMarkdown.gmk include ToolsJdk.gmk include ZipArchive.gmk +include TextFileProcessing.gmk # This is needed to properly setup DOCS_MODULES. $(eval $(call ReadImportMetaData)) @@ -64,7 +65,7 @@ MODULES_SOURCE_PATH := $(call PathList, $(call GetModuleSrcPath) \ # URLs JAVADOC_BASE_URL := https://docs.oracle.com/pls/topic/lookup?ctx=javase$(VERSION_NUMBER)&id=homepage BUG_SUBMIT_URL := https://bugreport.java.com/bugreport/ -COPYRIGHT_URL := {@docroot}/../legal/copyright.html +COPYRIGHT_URL := legal/copyright.html LICENSE_URL := https://www.oracle.com/technetwork/java/javase/terms/license/java$(VERSION_NUMBER)speclicense.html REDISTRIBUTION_URL := https://www.oracle.com/technetwork/java/redist-137594.html @@ -148,6 +149,15 @@ else HEADER_STYLE := style="margin-top: 14px;" endif +# $1 - Relative prefix to COPYRIGHT_URL +COPYRIGHT_BOTTOM = \ + Copyright \ + © 1993, $(COPYRIGHT_YEAR), $(FULL_COMPANY_NAME), \ + $(COMPANY_ADDRESS).
All rights reserved. \ + Use is subject to license terms and the \ + documentation redistribution policy. \ + $(DRAFT_MARKER_STR) + JAVADOC_BOTTOM := \ Report a bug or suggest an enhancement
\ For further API reference and developer documentation see the \ @@ -157,12 +167,7 @@ JAVADOC_BOTTOM := \ of terms, workarounds, and working code examples.
\ Java is a trademark or registered trademark of $(FULL_COMPANY_NAME) in \ the US and other countries.
\ - Copyright \ - © 1993, $(COPYRIGHT_YEAR), $(FULL_COMPANY_NAME), \ - $(COMPANY_ADDRESS).
All rights reserved. \ - Use is subject to license terms and the \ - documentation redistribution policy. \ - $(DRAFT_MARKER_STR) + $(call COPYRIGHT_BOTTOM, {@docroot}/../) JAVADOC_TOP := \
' with '/' # and ';' with '/g" -e "s/', and adjusting for edge cases. + # '&' has special meaning in sed so needs to be escaped. $1_REPLACEMENTS_COMMAND_LINE := $(SED) -e 's$$($1_SEP)$$(subst $$(SPACE);$$(SPACE),$$($1_SEP)g' \ -e 's$$($1_SEP),$$(subst $$(SPACE)=>$$(SPACE),$$($1_SEP),$$(subst $$(SPACE)=>$$(SPACE);$$(SPACE),$$($1_SEP)$$($1_SEP)g' \ - -e 's$$($1_SEP),$$(strip $$($1_REPLACEMENTS)))))$$($1_SEP)g' + -e 's$$($1_SEP),$$(subst &,\&,$$(strip $$($1_REPLACEMENTS))))))$$($1_SEP)g' else # We don't have any replacements, just pipe the file through cat. $1_REPLACEMENTS_COMMAND_LINE := $(CAT) diff --git a/src/hotspot/share/prims/jvmti.xml b/src/hotspot/share/prims/jvmti.xml index 51fb8567134..b55c108275b 100644 --- a/src/hotspot/share/prims/jvmti.xml +++ b/src/hotspot/share/prims/jvmti.xml @@ -1,7 +1,7 @@ - - @@ -367,10 +365,6 @@ JVM Tool Interface - - Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. - - The JVM Tool Interface () is a programming interface used by development and monitoring tools. diff --git a/src/hotspot/share/prims/jvmti.xsl b/src/hotspot/share/prims/jvmti.xsl index d4a49b8f0f7..8ab9f170a47 100644 --- a/src/hotspot/share/prims/jvmti.xsl +++ b/src/hotspot/share/prims/jvmti.xsl @@ -1,6 +1,6 @@ - + - @@ -119,7 +119,7 @@
  • Data Types
  • - @@ -220,12 +219,6 @@ - -

    - -

    -
    -

    @@ -286,7 +279,7 @@ - types: @@ -304,9 +297,9 @@ - + - flags and constants: @@ -322,7 +315,7 @@
    - +
    @@ -334,7 +327,7 @@ - +
    @@ -505,7 +498,7 @@ This function may be called from the callbacks to the Heap iteration functions, or from the - event handlers for the + event handlers for the GarbageCollectionStart, GarbageCollectionFinish, and ObjectFree events. @@ -536,7 +529,7 @@ - , + , @@ -932,7 +925,7 @@ typedef struct { - + @@ -949,12 +942,12 @@ typedef struct { - +
                 
    -          
    +
    @@ -977,7 +970,7 @@ typedef struct { - + @@ -994,7 +987,7 @@ typedef struct { - + @@ -1010,7 +1003,7 @@ typedef struct { - + @@ -1038,7 +1031,7 @@ typedef struct {
      - +
    • # @@ -1064,7 +1057,7 @@ typedef struct { The Java™ Virtual Machine Specification - , Chapter + , Chapter @@ -1155,18 +1148,18 @@ typedef struct { Optional Functionality: might not be implemented for all - virtual machines. + virtual machines. - The following capability + The following capability One of the following capabilities - (as returned by + (as returned by GetCapabilities) - must be true to use this + must be true to use this function. @@ -1322,8 +1315,8 @@ typedef struct { - This function returns either a - universal error + This function returns either a + universal error or one of the following errors @@ -1342,7 +1335,7 @@ typedef struct { - This function returns a + This function returns a universal error @@ -1370,7 +1363,7 @@ typedef struct { #jvmtiCapabilities. - + . Use AddCapabilities. @@ -1412,7 +1405,7 @@ typedef struct { - + @@ -1439,7 +1432,7 @@ typedef struct { - + @@ -1684,7 +1677,7 @@ typedef struct { - + @@ -1858,7 +1851,7 @@ typedef struct {
      -          void *reserved        
      +          void *reserved
                 
                 ;
               
      From c222e1aad05926a5ff43ede0348253a234ff2bc6 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Tue, 7 May 2019 21:53:38 +0200 Subject: [PATCH 2/7] 8223446: Shenandoah breaks alignment with some HumongousThreshold values Reviewed-by: rkennke --- .../gc/shenandoah/shenandoahHeapRegion.cpp | 12 +-- .../shenandoahHeapRegion.inline.hpp | 6 +- .../gc/shenandoah/TestHumongousThreshold.java | 75 +++++++++++++++++-- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp index c6ef5cbaa08..f40958d73b5 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp @@ -619,6 +619,7 @@ void ShenandoahHeapRegion::setup_sizes(size_t initial_heap_size, size_t max_heap guarantee(HumongousThresholdWords == 0, "we should only set it once"); HumongousThresholdWords = RegionSizeWords * ShenandoahHumongousThreshold / 100; + HumongousThresholdWords = align_down(HumongousThresholdWords, MinObjAlignment); assert (HumongousThresholdWords <= RegionSizeWords, "sanity"); guarantee(HumongousThresholdBytes == 0, "we should only set it once"); @@ -643,12 +644,13 @@ void ShenandoahHeapRegion::setup_sizes(size_t initial_heap_size, size_t max_heap // // The whole thing is mitigated if Elastic TLABs are enabled. // - guarantee(MaxTLABSizeBytes == 0, "we should only set it once"); - MaxTLABSizeBytes = MIN2(ShenandoahElasticTLAB ? RegionSizeBytes : (RegionSizeBytes / 8), HumongousThresholdBytes); - assert (MaxTLABSizeBytes > MinTLABSize, "should be larger"); - guarantee(MaxTLABSizeWords == 0, "we should only set it once"); - MaxTLABSizeWords = MaxTLABSizeBytes / HeapWordSize; + MaxTLABSizeWords = MIN2(ShenandoahElasticTLAB ? RegionSizeWords : (RegionSizeWords / 8), HumongousThresholdWords); + MaxTLABSizeWords = align_down(MaxTLABSizeWords, MinObjAlignment); + + guarantee(MaxTLABSizeBytes == 0, "we should only set it once"); + MaxTLABSizeBytes = MaxTLABSizeWords * HeapWordSize; + assert (MaxTLABSizeBytes > MinTLABSize, "should be larger"); log_info(gc, init)("Regions: " SIZE_FORMAT " x " SIZE_FORMAT "%s", RegionCount, byte_size_in_proper_unit(RegionSizeBytes), proper_unit_for_byte_size(RegionSizeBytes)); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp index d818aa1254f..063d710b906 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp @@ -32,6 +32,8 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, ShenandoahAllocRequest::Type type) { _heap->assert_heaplock_or_safepoint(); + assert(is_object_aligned(size), "alloc size breaks alignment: " SIZE_FORMAT, size); + HeapWord* obj = top(); if (pointer_delta(end(), obj) >= size) { make_regular_allocation(); @@ -39,7 +41,9 @@ HeapWord* ShenandoahHeapRegion::allocate(size_t size, ShenandoahAllocRequest::Ty HeapWord* new_top = obj + size; set_top(new_top); - assert(is_aligned(obj) && is_aligned(new_top), "checking alignment"); + + assert(is_object_aligned(new_top), "new top breaks alignment: " PTR_FORMAT, p2i(new_top)); + assert(is_object_aligned(obj), "obj is not aligned: " PTR_FORMAT, p2i(obj)); return obj; } else { diff --git a/test/hotspot/jtreg/gc/shenandoah/TestHumongousThreshold.java b/test/hotspot/jtreg/gc/shenandoah/TestHumongousThreshold.java index 6e1fa5333dc..3660afdbaa9 100644 --- a/test/hotspot/jtreg/gc/shenandoah/TestHumongousThreshold.java +++ b/test/hotspot/jtreg/gc/shenandoah/TestHumongousThreshold.java @@ -26,13 +26,76 @@ * @key gc * @requires vm.gc.Shenandoah * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g -XX:+ShenandoahVerify TestHumongousThreshold - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g -XX:ShenandoahHumongousThreshold=50 -XX:+ShenandoahVerify TestHumongousThreshold - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g -XX:ShenandoahHumongousThreshold=90 -XX:+ShenandoahVerify TestHumongousThreshold - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g -XX:ShenandoahHumongousThreshold=99 -XX:+ShenandoahVerify TestHumongousThreshold - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g -XX:ShenandoahHumongousThreshold=100 -XX:+ShenandoahVerify TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:+ShenandoahVerify + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=50 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=90 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=99 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=100 + * TestHumongousThreshold * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g -XX:ShenandoahHumongousThreshold=90 -XX:ShenandoahGCHeuristics=aggressive TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:+ShenandoahVerify + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=50 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=90 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=99 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=100 + * TestHumongousThreshold + * + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=50 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=90 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=99 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=100 + * TestHumongousThreshold + * + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=50 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=90 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=99 + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:ObjectAlignmentInBytes=16 -XX:+ShenandoahVerify -XX:ShenandoahHumongousThreshold=100 + * TestHumongousThreshold + * + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:ShenandoahHumongousThreshold=90 -XX:ShenandoahGCHeuristics=aggressive + * TestHumongousThreshold + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -Xmx1g + * -XX:-UseTLAB -XX:ShenandoahHumongousThreshold=90 -XX:ShenandoahGCHeuristics=aggressive + * TestHumongousThreshold */ import java.util.Random; From 0284208ab3c39bc2972c5446c669436b2e185b1f Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Tue, 7 May 2019 21:53:46 +0200 Subject: [PATCH 3/7] 8223447: Stabilize gc/shenandoah/TestStringDedupStress test Reviewed-by: rkennke --- .../gc/shenandoah/TestStringDedupStress.java | 73 ++++++++++++------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/test/hotspot/jtreg/gc/shenandoah/TestStringDedupStress.java b/test/hotspot/jtreg/gc/shenandoah/TestStringDedupStress.java index 820921ac824..72182e8467f 100644 --- a/test/hotspot/jtreg/gc/shenandoah/TestStringDedupStress.java +++ b/test/hotspot/jtreg/gc/shenandoah/TestStringDedupStress.java @@ -30,60 +30,74 @@ * @modules java.base/jdk.internal.misc:open * @modules java.base/java.lang:open * java.management - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -DtargetStrings=3000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=aggressive -DtargetStrings=2000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=aggressive -XX:+ShenandoahOOMDuringEvacALot -DtargetStrings=2000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=static -DtargetStrings=4000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=compact + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats - * -XX:ShenandoahGCHeuristics=passive -XX:+ShenandoahDegeneratedGC -DtargetOverwrites=40000000 + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats + * -XX:ShenandoahGCHeuristics=passive -XX:+ShenandoahDegeneratedGC + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats - * -XX:ShenandoahGCHeuristics=passive -XX:-ShenandoahDegeneratedGC -DtargetOverwrites=40000000 + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats + * -XX:ShenandoahGCHeuristics=passive -XX:-ShenandoahDegeneratedGC + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=traversal + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahUpdateRefsEarly=off -DtargetStrings=3000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=compact -XX:ShenandoahUpdateRefsEarly=off -DtargetStrings=2000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=aggressive -XX:ShenandoahUpdateRefsEarly=off -DtargetStrings=2000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats - * -XX:ShenandoahGCHeuristics=static -XX:ShenandoahUpdateRefsEarly=off -DtargetOverwrites=4000000 + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats + * -XX:ShenandoahGCHeuristics=static -XX:ShenandoahUpdateRefsEarly=off + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=aggressive -XX:ShenandoahUpdateRefsEarly=off -XX:+ShenandoahOOMDuringEvacALot -DtargetStrings=2000000 + * -Xlog:gc * TestStringDedupStress * - * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx512M -Xlog:gc+stats + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseShenandoahGC -XX:+UseStringDeduplication -Xmx1g -Xlog:gc+stats * -XX:ShenandoahGCHeuristics=traversal -XX:+ShenandoahOOMDuringEvacALot -DtargetStrings=2000000 + * -Xlog:gc * TestStringDedupStress */ @@ -97,9 +111,9 @@ public class TestStringDedupStress { private static Field valueField; private static Unsafe unsafe; - private static long TARGET_STRINGS = Long.getLong("targetStrings", 2_500_000); - private static long TARGET_OVERWRITES = Long.getLong("targetOverwrites", 600_000); + private static final int TARGET_STRINGS = Integer.getInteger("targetStrings", 2_500_000); private static final long MAX_REWRITE_GC_CYCLES = 6; + private static final long MAX_REWRITE_TIME = 30*1000; // ms private static final int UNIQUE_STRINGS = 20; @@ -151,8 +165,8 @@ public class TestStringDedupStress { } } - private static int verifyDedepString(ArrayList strs) { - HashMap seen = new HashMap<>(); + private static int verifyDedupString(ArrayList strs) { + Map seen = new HashMap<>(TARGET_STRINGS*2); int total = 0; int dedup = 0; @@ -195,14 +209,16 @@ public class TestStringDedupStress { } // Generate roughly TARGET_STRINGS strings, only UNIQUE_STRINGS are unique - long genIters = TARGET_STRINGS / UNIQUE_STRINGS; - for (long index = 0; index < genIters; index++) { + int genIters = TARGET_STRINGS / UNIQUE_STRINGS; + for (int index = 0; index < genIters; index++) { generateStrings(astrs, UNIQUE_STRINGS); } long cycleBeforeRewrite = gcCycleMBean.getCollectionCount(); + long timeBeforeRewrite = System.currentTimeMillis(); - for (long loop = 1; loop < TARGET_OVERWRITES; loop++) { + long loop = 1; + while (true) { int arrSize = astrs.size(); int index = rn.nextInt(arrSize); StringAndId item = astrs.get(index); @@ -210,13 +226,18 @@ public class TestStringDedupStress { item.str = "Unique String " + n; item.id = n; - if (loop % 1000 == 0) { + if (loop++ % 1000 == 0) { // enough GC cycles for rewritten strings to be deduplicated if (gcCycleMBean.getCollectionCount() - cycleBeforeRewrite >= MAX_REWRITE_GC_CYCLES) { break; } + + // enough time is spent waiting for GC to happen + if (System.currentTimeMillis() - timeBeforeRewrite >= MAX_REWRITE_TIME) { + break; + } } } - verifyDedepString(astrs); + verifyDedupString(astrs); } } From 707c30fae6616fa603a0b45aae749b2fe137db5f Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Tue, 7 May 2019 13:33:27 -0700 Subject: [PATCH 4/7] 8222074: Enhance auto vectorization for x86 Reviewed-by: kvn, vlivanov --- src/hotspot/cpu/x86/assembler_x86.cpp | 111 +- src/hotspot/cpu/x86/assembler_x86.hpp | 16 + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 214 +- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 42 +- src/hotspot/cpu/x86/stubGenerator_x86_32.cpp | 60 + src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 41 + src/hotspot/cpu/x86/stubRoutines_x86.cpp | 7 + src/hotspot/cpu/x86/stubRoutines_x86.hpp | 35 + src/hotspot/cpu/x86/x86.ad | 1802 ++++++++--------- src/hotspot/cpu/x86/x86_32.ad | 22 + src/hotspot/cpu/x86/x86_64.ad | 46 + src/hotspot/share/adlc/formssel.cpp | 6 +- src/hotspot/share/classfile/vmSymbols.cpp | 9 + src/hotspot/share/classfile/vmSymbols.hpp | 4 + src/hotspot/share/opto/c2compiler.cpp | 3 + src/hotspot/share/opto/classes.hpp | 6 + src/hotspot/share/opto/library_call.cpp | 30 +- src/hotspot/share/opto/subnode.hpp | 11 + src/hotspot/share/opto/superword.cpp | 1 + src/hotspot/share/opto/vectornode.cpp | 21 +- src/hotspot/share/opto/vectornode.hpp | 40 + src/hotspot/share/runtime/vmStructs.cpp | 7 + .../share/classes/java/lang/Math.java | 3 + .../hotspot/test/CheckGraalIntrinsics.java | 3 + .../compiler/c2/cr6340864/TestDoubleVect.java | 23 + .../compiler/c2/cr6340864/TestFloatVect.java | 25 + .../compiler/c2/cr6340864/TestIntVect.java | 58 + 27 files changed, 1632 insertions(+), 1014 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 9920147e68e..f32f91b28aa 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -1894,6 +1894,69 @@ void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) { emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::pabsb(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_ssse3(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1C); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::pabsw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_ssse3(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1D); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::pabsd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_ssse3(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x1E); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit? VM_Version::supports_avx() : + vector_len == AVX_256bit? VM_Version::supports_avx2() : + vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x1C); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit? VM_Version::supports_avx() : + vector_len == AVX_256bit? VM_Version::supports_avx2() : + vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x1D); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::vpabsd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit? VM_Version::supports_avx() : + vector_len == AVX_256bit? VM_Version::supports_avx2() : + vector_len == AVX_512bit? VM_Version::supports_evex() : 0, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x1E); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2, ""); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x1F); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::decl(Address dst) { // Don't use it directly. Use MacroAssembler::decrement() instead. InstructionMark im(this); @@ -3416,10 +3479,19 @@ void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_le InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x00); - emit_int8(0xC0 | encode); + emit_int8((unsigned char)(0xC0 | encode)); emit_int8(imm8); } +void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 2, "requires AVX512F"); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x36); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_avx2(), ""); InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -3884,6 +3956,14 @@ void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x20); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); @@ -3905,6 +3985,15 @@ void Assembler::vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { emit_int8((unsigned char) (0xC0 | encode)); } +void Assembler::vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit? VM_Version::supports_avx() : + vector_len == AVX_256bit? VM_Version::supports_avx2() : + vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x20); + emit_int8((unsigned char)(0xC0 | encode)); +} void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); @@ -6277,6 +6366,26 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 2, "requires AVX512"); + assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x72); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); +} + +void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 2, "requires AVX512"); + assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE2); + emit_int8((unsigned char)(0xC0 | encode)); +} // logical operations packed integers void Assembler::pand(XMMRegister dst, XMMRegister src) { diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 51a7c908ed0..c6d2d1f129e 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1102,6 +1102,15 @@ private: void cvttpd2dq(XMMRegister dst, XMMRegister src); + //Abs of packed Integer values + void pabsb(XMMRegister dst, XMMRegister src); + void pabsw(XMMRegister dst, XMMRegister src); + void pabsd(XMMRegister dst, XMMRegister src); + void vpabsb(XMMRegister dst, XMMRegister src, int vector_len); + void vpabsw(XMMRegister dst, XMMRegister src, int vector_len); + void vpabsd(XMMRegister dst, XMMRegister src, int vector_len); + void evpabsq(XMMRegister dst, XMMRegister src, int vector_len); + // Divide Scalar Double-Precision Floating-Point Values void divsd(XMMRegister dst, Address src); void divsd(XMMRegister dst, XMMRegister src); @@ -1589,6 +1598,7 @@ private: // Pemutation of 64bit words void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void vpermq(XMMRegister dst, XMMRegister src, int imm8); + void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -1668,6 +1678,10 @@ private: void evpmovdb(Address dst, XMMRegister src, int vector_len); + // Sign extend moves + void pmovsxbw(XMMRegister dst, XMMRegister src); + void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len); + // Multiply add void pmaddwd(XMMRegister dst, XMMRegister src); void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -2094,6 +2108,8 @@ private: void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len); void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len); + void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); // And packed integers void pand(XMMRegister dst, XMMRegister src); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 0c5190abf07..eeee30ee39a 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -1003,25 +1003,25 @@ void MacroAssembler::align(int modulus, int target) { } } -void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-masking with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::andpd(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::andpd(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::andpd(dst, Address(scratch_reg, 0)); } } -void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-masking with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::andps(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::andps(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::andps(dst, Address(scratch_reg, 0)); } } @@ -3340,13 +3340,13 @@ void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { Assembler::vmovdqu(dst, src); } -void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) { if (reachable(src)) { vmovdqu(dst, as_Address(src)); } else { - lea(rscratch1, src); - vmovdqu(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + vmovdqu(dst, Address(scratch_reg, 0)); } } @@ -3698,14 +3698,14 @@ void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { } } -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::xorpd(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::xorpd(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::xorpd(dst, Address(scratch_reg, 0)); } } @@ -3726,14 +3726,14 @@ void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) { } } -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { // Used in sign-bit flipping with aligned address. assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { Assembler::xorps(dst, as_Address(src)); } else { - lea(rscratch1, src); - Assembler::xorps(dst, Address(rscratch1, 0)); + lea(scratch_reg, src); + Assembler::xorps(dst, Address(scratch_reg, 0)); } } @@ -3799,12 +3799,12 @@ void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int v Assembler::vpaddw(dst, nds, src, vector_len); } -void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { Assembler::vpand(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - Assembler::vpand(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len); } } @@ -3873,6 +3873,22 @@ void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vec Assembler::vpsraw(dst, nds, shift, vector_len); } +void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { + assert(UseAVX > 2,""); + if (!VM_Version::supports_avx512vl() && vector_len < 2) { + vector_len = 2; + } + Assembler::evpsraq(dst, nds, shift, vector_len); +} + +void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { + assert(UseAVX > 2,""); + if (!VM_Version::supports_avx512vl() && vector_len < 2) { + vector_len = 2; + } + Assembler::evpsraq(dst, nds, shift, vector_len); +} + void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpsrlw(dst, nds, shift, vector_len); @@ -3913,21 +3929,21 @@ void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshuflw(dst, src, mode); } -void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vandpd(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vandpd(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vandpd(dst, nds, Address(scratch_reg, 0), vector_len); } } -void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vandps(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vandps(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vandps(dst, nds, Address(scratch_reg, 0), vector_len); } } @@ -3995,24 +4011,162 @@ void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral vxorpd(dst, nds, src, Assembler::AVX_128bit); } -void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vxorpd(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vxorpd(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vxorpd(dst, nds, Address(scratch_reg, 0), vector_len); } } -void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { if (reachable(src)) { vxorps(dst, nds, as_Address(src), vector_len); } else { - lea(rscratch1, src); - vxorps(dst, nds, Address(rscratch1, 0), vector_len); + lea(scratch_reg, src); + vxorps(dst, nds, Address(scratch_reg, 0), vector_len); } } +void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { + if (UseAVX > 1 || (vector_len < 1)) { + if (reachable(src)) { + Assembler::vpxor(dst, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len); + } + } + else { + MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); + } +} + +//------------------------------------------------------------------------------------------- +#ifdef COMPILER2 +// Generic instructions support for use in .ad files C2 code generation + +void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, Register scr) { + if (opcode == Op_AbsVD) { + andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr); + } else { + assert((opcode == Op_NegVD),"opcode should be Op_NegD"); + xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr); + } +} + +void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { + if (opcode == Op_AbsVD) { + vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr); + } else { + assert((opcode == Op_NegVD),"opcode should be Op_NegD"); + vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr); + } +} + +void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, Register scr) { + if (opcode == Op_AbsVF) { + andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr); + } else { + assert((opcode == Op_NegVF),"opcode should be Op_NegF"); + xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr); + } +} + +void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { + if (opcode == Op_AbsVF) { + vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr); + } else { + assert((opcode == Op_NegVF),"opcode should be Op_NegF"); + vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr); + } +} + +void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) { + if (sign) { + pmovsxbw(dst, src); + } else { + pmovzxbw(dst, src); + } +} + +void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len) { + if (sign) { + vpmovsxbw(dst, src, vector_len); + } else { + vpmovzxbw(dst, src, vector_len); + } +} + +void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) { + if (opcode == Op_RShiftVI) { + psrad(dst, src); + } else if (opcode == Op_LShiftVI) { + pslld(dst, src); + } else { + assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI"); + psrld(dst, src); + } +} + +void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + if (opcode == Op_RShiftVI) { + vpsrad(dst, nds, src, vector_len); + } else if (opcode == Op_LShiftVI) { + vpslld(dst, nds, src, vector_len); + } else { + assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI"); + vpsrld(dst, nds, src, vector_len); + } +} + +void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src) { + if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) { + psraw(dst, src); + } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) { + psllw(dst, src); + } else { + assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB"); + psrlw(dst, src); + } +} + +void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) { + vpsraw(dst, nds, src, vector_len); + } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) { + vpsllw(dst, nds, src, vector_len); + } else { + assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB"); + vpsrlw(dst, nds, src, vector_len); + } +} + +void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) { + if (opcode == Op_RShiftVL) { + psrlq(dst, src); // using srl to implement sra on pre-avs512 systems + } else if (opcode == Op_LShiftVL) { + psllq(dst, src); + } else { + assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL"); + psrlq(dst, src); + } +} + +void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + if (opcode == Op_RShiftVL) { + evpsraq(dst, nds, src, vector_len); + } else if (opcode == Op_LShiftVL) { + vpsllq(dst, nds, src, vector_len); + } else { + assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL"); + vpsrlq(dst, nds, src, vector_len); + } +} +#endif +//------------------------------------------------------------------------------------------- + void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 04e9c20bbe3..a5966fb9ddf 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -877,12 +877,12 @@ class MacroAssembler: public Assembler { // Floating void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } - void andpd(XMMRegister dst, AddressLiteral src); + void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } - void andps(XMMRegister dst, AddressLiteral src); + void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } @@ -1066,8 +1066,8 @@ private: // these are private because users should be doing movflt/movdbl - void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } + void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } void movss(XMMRegister dst, AddressLiteral src); @@ -1105,7 +1105,7 @@ public: void vmovdqu(Address dst, XMMRegister src); void vmovdqu(XMMRegister dst, Address src); void vmovdqu(XMMRegister dst, XMMRegister src); - void vmovdqu(XMMRegister dst, AddressLiteral src); + void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } @@ -1183,12 +1183,12 @@ public: // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } - void xorpd(XMMRegister dst, AddressLiteral src); + void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values void xorps(XMMRegister dst, XMMRegister src); void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } - void xorps(XMMRegister dst, AddressLiteral src); + void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); // Shuffle Bytes void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } @@ -1215,7 +1215,7 @@ public: void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } - void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); + void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); } @@ -1241,6 +1241,9 @@ public: void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); + void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len); + void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len); void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); @@ -1260,11 +1263,11 @@ public: void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); } - void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } - void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } @@ -1297,11 +1300,11 @@ public: void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); } - void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); } - void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2 @@ -1315,6 +1318,7 @@ public: else Assembler::vxorpd(dst, nds, src, vector_len); } + void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); // Simple version for AVX2 256bit vectors void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } @@ -1601,6 +1605,22 @@ public: void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } +#ifdef COMPILER2 + // Generic instructions support for use in .ad files C2 code generation + void vabsnegd(int opcode, XMMRegister dst, Register scr); + void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); + void vabsnegf(int opcode, XMMRegister dst, Register scr); + void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); + void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); + void vextendbw(bool sign, XMMRegister dst, XMMRegister src); + void vshiftd(int opcode, XMMRegister dst, XMMRegister src); + void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vshiftw(int opcode, XMMRegister dst, XMMRegister src); + void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vshiftq(int opcode, XMMRegister dst, XMMRegister src); + void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); +#endif + // C2 compiled method's prolog code. void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp index 0fe161bab91..74824e37984 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp @@ -602,7 +602,59 @@ class StubGenerator: public StubCodeGenerator { return start; } + //--------------------------------------------------------------------------------------------------- + address generate_vector_mask(const char *stub_name, int32_t mask) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stub_name); + address start = __ pc(); + + for (int i = 0; i < 16; i++) { + __ emit_data(mask, relocInfo::none, 0); + } + + return start; + } + + address generate_vector_mask_long_double(const char *stub_name, int32_t maskhi, int32_t masklo) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stub_name); + address start = __ pc(); + + for (int i = 0; i < 8; i++) { + __ emit_data(masklo, relocInfo::none, 0); + __ emit_data(maskhi, relocInfo::none, 0); + } + + return start; + } + + //---------------------------------------------------------------------------------------------------- + + address generate_vector_byte_perm_mask(const char *stub_name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stub_name); + address start = __ pc(); + + __ emit_data(0x00000001, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000003, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000005, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000007, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000002, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000004, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + __ emit_data(0x00000006, relocInfo::none, 0); + __ emit_data(0x00000000, relocInfo::none, 0); + + return start; + } //---------------------------------------------------------------------------------------------------- // Non-destructive plausibility checks for oops @@ -3823,6 +3875,14 @@ class StubGenerator: public StubCodeGenerator { //------------------------------------------------------------------------------------------------------------------------ // entry points that are platform specific + StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF); + StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x80000000); + StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double("vector_double_sign_mask", 0x7FFFFFFF, 0xFFFFFFFF); + StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double("vector_double_sign_flip", 0x80000000, 0x00000000); + StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff); + StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask"); + StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000); + // support for verify_oop (must happen after universe_init) StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 724f025ec53..a6b269892cb 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -979,6 +979,40 @@ class StubGenerator: public StubCodeGenerator { return start; } + address generate_vector_mask(const char *stub_name, int64_t mask) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stub_name); + address start = __ pc(); + + __ emit_data64(mask, relocInfo::none); + __ emit_data64(mask, relocInfo::none); + __ emit_data64(mask, relocInfo::none); + __ emit_data64(mask, relocInfo::none); + __ emit_data64(mask, relocInfo::none); + __ emit_data64(mask, relocInfo::none); + __ emit_data64(mask, relocInfo::none); + __ emit_data64(mask, relocInfo::none); + + return start; + } + + address generate_vector_byte_perm_mask(const char *stub_name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stub_name); + address start = __ pc(); + + __ emit_data64(0x0000000000000001, relocInfo::none); + __ emit_data64(0x0000000000000003, relocInfo::none); + __ emit_data64(0x0000000000000005, relocInfo::none); + __ emit_data64(0x0000000000000007, relocInfo::none); + __ emit_data64(0x0000000000000000, relocInfo::none); + __ emit_data64(0x0000000000000002, relocInfo::none); + __ emit_data64(0x0000000000000004, relocInfo::none); + __ emit_data64(0x0000000000000006, relocInfo::none); + + return start; + } + // Non-destructive plausibility checks for oops // // Arguments: @@ -5871,6 +5905,13 @@ address generate_avx_ghash_processBlocks() { StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000); StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF); StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000); + StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF7FFFFFFF); + StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000); + StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF); + StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000); + StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff); + StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask"); + StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000); // support for verify_oop (must happen after universe_init) StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.cpp b/src/hotspot/cpu/x86/stubRoutines_x86.cpp index 519f12273c4..e9bc44739ff 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp @@ -43,6 +43,13 @@ address StubRoutines::x86::_ghash_shuffmask_addr = NULL; address StubRoutines::x86::_upper_word_mask_addr = NULL; address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL; address StubRoutines::x86::_k256_adr = NULL; +address StubRoutines::x86::_vector_short_to_byte_mask = NULL; +address StubRoutines::x86::_vector_float_sign_mask = NULL; +address StubRoutines::x86::_vector_float_sign_flip = NULL; +address StubRoutines::x86::_vector_double_sign_mask = NULL; +address StubRoutines::x86::_vector_double_sign_flip = NULL; +address StubRoutines::x86::_vector_byte_perm_mask = NULL; +address StubRoutines::x86::_vector_long_sign_mask = NULL; #ifdef _LP64 address StubRoutines::x86::_k256_W_adr = NULL; address StubRoutines::x86::_k512_W_addr = NULL; diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp index b29e90ecce3..eec99d798fe 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp @@ -102,6 +102,7 @@ class x86 { static address double_sign_flip() { return _double_sign_flip; } + #else // !LP64 private: @@ -139,6 +140,13 @@ class x86 { //k256 table for sha256 static juint _k256[]; static address _k256_adr; + static address _vector_short_to_byte_mask; + static address _vector_float_sign_mask; + static address _vector_float_sign_flip; + static address _vector_double_sign_mask; + static address _vector_double_sign_flip; + static address _vector_byte_perm_mask; + static address _vector_long_sign_mask; #ifdef _LP64 static juint _k256_W[]; static address _k256_W_adr; @@ -212,6 +220,33 @@ class x86 { static address upper_word_mask_addr() { return _upper_word_mask_addr; } static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; } static address k256_addr() { return _k256_adr; } + + static address vector_short_to_byte_mask() { + return _vector_short_to_byte_mask; + } + static address vector_float_sign_mask() { + return _vector_float_sign_mask; + } + + static address vector_float_sign_flip() { + return _vector_float_sign_flip; + } + + static address vector_double_sign_mask() { + return _vector_double_sign_mask; + } + + static address vector_double_sign_flip() { + return _vector_double_sign_flip; + } + + static address vector_byte_perm_mask() { + return _vector_byte_perm_mask; + } + + static address vector_long_sign_mask() { + return _vector_long_sign_mask; + } #ifdef _LP64 static address k256_W_addr() { return _k256_W_adr; } static address k512_W_addr() { return _k512_W_addr; } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index da61baecee5..0fec2c8e18f 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1372,14 +1372,20 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { static address double_signmask() { return (address)double_signmask_pool; } static address double_signflip() { return (address)double_signflip_pool; } #endif + static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } + static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } + static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } - +//============================================================================= const bool Matcher::match_rule_supported(int opcode) { if (!has_match_rule(opcode)) return false; bool ret_value = true; switch (opcode) { + case Op_AbsVL: + if (UseAVX < 3) + ret_value = false; case Op_PopCountI: case Op_PopCountL: if (!UsePopCountInstruction) @@ -1402,6 +1408,9 @@ const bool Matcher::match_rule_supported(int opcode) { if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here ret_value = false; break; + case Op_AbsVB: + case Op_AbsVS: + case Op_AbsVI: case Op_AddReductionVI: if (UseSSE < 3) // requires at least SSE3 ret_value = false; @@ -1447,9 +1456,19 @@ const bool Matcher::match_rule_supported(int opcode) { ret_value = false; break; case Op_MulAddVS2VI: + case Op_RShiftVL: + case Op_AbsVD: + case Op_NegVD: if (UseSSE < 2) ret_value = false; break; + case Op_MulVB: + case Op_LShiftVB: + case Op_RShiftVB: + case Op_URShiftVB: + if (UseSSE < 4) + ret_value = false; + break; #ifdef _LP64 case Op_MaxD: case Op_MaxF: @@ -1470,24 +1489,42 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { bool ret_value = match_rule_supported(opcode); if (ret_value) { switch (opcode) { + case Op_AbsVB: case Op_AddVB: case Op_SubVB: if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; - case Op_URShiftVS: - case Op_RShiftVS: - case Op_LShiftVS: - case Op_MulVS: + case Op_AbsVS: case Op_AddVS: case Op_SubVS: + case Op_MulVS: + case Op_LShiftVS: + case Op_RShiftVS: + case Op_URShiftVS: if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) ret_value = false; break; + case Op_MulVB: + case Op_LShiftVB: + case Op_RShiftVB: + case Op_URShiftVB: + if ((vlen == 32 && UseAVX < 2) || + ((vlen == 64) && (VM_Version::supports_avx512bw() == false))) + ret_value = false; + break; + case Op_NegVF: + if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) + ret_value = false; + break; case Op_CMoveVF: if (vlen != 8) ret_value = false; break; + case Op_NegVD: + if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) + ret_value = false; + break; case Op_CMoveVD: if (vlen != 4) ret_value = false; @@ -7302,6 +7339,186 @@ instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ // --------------------------------- MUL -------------------------------------- +// Byte vector mul +instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 4); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"pmovsxbw $tmp,$src1\n\t" + "pmovsxbw $dst,$src2\n\t" + "pmullw $tmp,$dst\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $dst,$tmp\n\t" + "packuswb $dst,$dst\t! mul packed4B" %} + ins_encode %{ + __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); + __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); + __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($dst$$XMMRegister, $tmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 8); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"pmovsxbw $tmp,$src1\n\t" + "pmovsxbw $dst,$src2\n\t" + "pmullw $tmp,$dst\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $dst,$tmp\n\t" + "packuswb $dst,$dst\t! mul packed8B" %} + ins_encode %{ + __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); + __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); + __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($dst$$XMMRegister, $tmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 16); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"pmovsxbw $tmp1,$src1\n\t" + "pmovsxbw $tmp2,$src2\n\t" + "pmullw $tmp1,$tmp2\n\t" + "pshufd $tmp2,$src1,0xEE\n\t" + "pshufd $dst,$src2,0xEE\n\t" + "pmovsxbw $tmp2,$tmp2\n\t" + "pmovsxbw $dst,$dst\n\t" + "pmullw $tmp2,$dst\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $tmp2,$dst\n\t" + "pand $dst,$tmp1\n\t" + "packuswb $dst,$tmp2\t! mul packed16B" %} + ins_encode %{ + __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); + __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); + __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); + __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); + __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); + __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); + __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"vpmovsxbw $tmp,$src1\n\t" + "vpmovsxbw $dst,$src2\n\t" + "vpmullw $tmp,$tmp,$dst\n\t" + "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "vpand $dst,$dst,$tmp\n\t" + "vextracti128_high $tmp,$dst\n\t" + "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %} + ins_encode %{ + int vector_len = 1; + __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); + __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"vextracti128_high $tmp1,$src1\n\t" + "vextracti128_high $dst,$src2\n\t" + "vpmovsxbw $tmp1,$tmp1\n\t" + "vpmovsxbw $dst,$dst\n\t" + "vpmullw $tmp1,$tmp1,$dst\n\t" + "vpmovsxbw $tmp2,$src1\n\t" + "vpmovsxbw $dst,$src2\n\t" + "vpmullw $tmp2,$tmp2,$dst\n\t" + "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" + "vpbroadcastd $dst, $dst\n\t" + "vpand $tmp1,$tmp1,$dst\n\t" + "vpand $dst,$dst,$tmp2\n\t" + "vpackuswb $dst,$dst,$tmp1\n\t" + "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); + __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); + __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); + __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); + __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + match(Set dst (MulVB src1 src2)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"vextracti64x4_high $tmp1,$src1\n\t" + "vextracti64x4_high $dst,$src2\n\t" + "vpmovsxbw $tmp1,$tmp1\n\t" + "vpmovsxbw $dst,$dst\n\t" + "vpmullw $tmp1,$tmp1,$dst\n\t" + "vpmovsxbw $tmp2,$src1\n\t" + "vpmovsxbw $dst,$src2\n\t" + "vpmullw $tmp2,$tmp2,$dst\n\t" + "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" + "vpbroadcastd $dst, $dst\n\t" + "vpand $tmp1,$tmp1,$dst\n\t" + "vpand $tmp2,$tmp2,$dst\n\t" + "vpackuswb $dst,$tmp1,$tmp2\n\t" + "evmovdquq $tmp2,[0x0604020007050301]\n\t" + "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %} + + ins_encode %{ + int vector_len = 2; + __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); + __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); + __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); + __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); + __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); + __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); + __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + + %} + ins_pipe( pipe_slow ); +%} + // Shorts/Chars vector mul instruct vmul2S(vecS dst, vecS src) %{ predicate(UseAVX == 0 && n->as_Vector()->length() == 2); @@ -8024,20 +8241,6 @@ instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ ins_pipe( pipe_slow ); %} -// ------------------------------ Shift --------------------------------------- - -// Left and right shift count vectors are the same on x86 -// (only lowest bits of xmm reg are used for count). -instruct vshiftcnt(vecS dst, rRegI cnt) %{ - match(Set dst (LShiftCntV cnt)); - match(Set dst (RShiftCntV cnt)); - format %{ "movd $dst,$cnt\t! load shift count" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $cnt$$Register); - %} - ins_pipe( pipe_slow ); -%} - // --------------------------------- Sqrt -------------------------------------- // Floating point vector sqrt @@ -8195,1093 +8398,479 @@ instruct vsqrt16F_mem(vecZ dst, memory mem) %{ ins_pipe( pipe_slow ); %} -// ------------------------------ LeftShift ----------------------------------- +// ------------------------------ Shift --------------------------------------- -// Shorts/Chars vector left shift -instruct vsll2S(vecS dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed2S" %} +// Left and right shift count vectors are the same on x86 +// (only lowest bits of xmm reg are used for count). +instruct vshiftcnt(vecS dst, rRegI cnt) %{ + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "movdl $dst,$cnt\t! load shift count" %} ins_encode %{ - __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + __ movdl($dst$$XMMRegister, $cnt$$Register); %} ins_pipe( pipe_slow ); %} -instruct vsll2S_imm(vecS dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed2S" %} +instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{ + match(Set dst cnt); + effect(TEMP tmp); + format %{ "movl $tmp,$cnt\t" + "movdl $dst,$tmp\t! load shift count" %} ins_encode %{ - __ psllw($dst$$XMMRegister, (int)$shift$$constant); + __ movl($tmp$$Register, $cnt$$constant); + __ movdl($dst$$XMMRegister, $tmp$$Register); %} ins_pipe( pipe_slow ); %} -instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} +// Byte vector shift +instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"vextendbw $tmp,$src\n\t" + "vshiftw $tmp,$shift\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $dst,$tmp\n\t" + "packuswb $dst,$dst\n\t ! packed4B shift" %} ins_encode %{ - int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + + __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); + __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($dst$$XMMRegister, $tmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} +instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"vextendbw $tmp,$src\n\t" + "vshiftw $tmp,$shift\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $dst,$tmp\n\t" + "packuswb $dst,$dst\n\t ! packed8B shift" %} ins_encode %{ - int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + + __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); + __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($dst$$XMMRegister, $tmp$$XMMRegister); + __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsll4S(vecD dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed4S" %} +instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{ + predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"vextendbw $tmp1,$src\n\t" + "vshiftw $tmp1,$shift\n\t" + "pshufd $tmp2,$src\n\t" + "vextendbw $tmp2,$tmp2\n\t" + "vshiftw $tmp2,$shift\n\t" + "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "pand $tmp2,$dst\n\t" + "pand $dst,$tmp1\n\t" + "packuswb $dst,$tmp2\n\t! packed16B shift" %} ins_encode %{ - __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + int opcode = this->as_Mach()->ideal_Opcode(); + + __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); + __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); + __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); + __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); + __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); + __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); + __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); + __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsll4S_imm(vecD dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed4S" %} - ins_encode %{ - __ psllw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} - ins_encode %{ - int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} - ins_encode %{ - int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8S(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed8S" %} - ins_encode %{ - __ psllw($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8S_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVS dst shift)); - format %{ "psllw $dst,$shift\t! left shift packed8S" %} - ins_encode %{ - __ psllw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} - ins_encode %{ - int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} - ins_encode %{ - int vector_len = 0; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ +instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"vextendbw $tmp,$src\n\t" + "vshiftw $tmp,$tmp,$shift\n\t" + "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" + "vextracti128_high $dst,$tmp\n\t" + "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %} ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + int vector_len = 1; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); + __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); + __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); + __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); %} ins_pipe( pipe_slow ); %} -instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} +instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{"vextracti128_high $tmp,$src\n\t" + "vextendbw $tmp,$tmp\n\t" + "vextendbw $dst,$src\n\t" + "vshiftw $tmp,$tmp,$shift\n\t" + "vshiftw $dst,$dst,$shift\n\t" + "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" + "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t" + "vpackuswb $dst,$dst,$tmp\n\t" + "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %} ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + int vector_len = 1; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); + __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); + __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); + __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); + __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} +instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); + format %{"vextracti64x4 $tmp1,$src\n\t" + "vextendbw $tmp1,$tmp1\n\t" + "vextendbw $tmp2,$src\n\t" + "vshiftw $tmp1,$tmp1,$shift\n\t" + "vshiftw $tmp2,$tmp2,$shift\n\t" + "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" + "vpbroadcastd $dst,$dst\n\t" + "vpand $tmp1,$tmp1,$dst\n\t" + "vpand $tmp2,$tmp2,$dst\n\t" + "vpackuswb $dst,$tmp1,$tmp2\n\t" + "evmovdquq $tmp2, [0x0604020007050301]\n\t" + "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %} ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + int vector_len = 2; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); + __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); + __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); + __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); + __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); + __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); + __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); - match(Set dst (LShiftVS src shift)); - format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} - ins_encode %{ - int vector_len = 2; - __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// Integers vector left shift -instruct vsll2I(vecD dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVI dst shift)); - format %{ "pslld $dst,$shift\t! left shift packed2I" %} - ins_encode %{ - __ pslld($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll2I_imm(vecD dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVI dst shift)); - format %{ "pslld $dst,$shift\t! left shift packed2I" %} - ins_encode %{ - __ pslld($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} - ins_encode %{ - int vector_len = 0; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} - ins_encode %{ - int vector_len = 0; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4I(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVI dst shift)); - format %{ "pslld $dst,$shift\t! left shift packed4I" %} - ins_encode %{ - __ pslld($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4I_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVI dst shift)); - format %{ "pslld $dst,$shift\t! left shift packed4I" %} - ins_encode %{ - __ pslld($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} - ins_encode %{ - int vector_len = 1; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} - ins_encode %{ - int vector_len = 1; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} - ins_encode %{ - int vector_len = 2; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (LShiftVI src shift)); - format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} - ins_encode %{ - int vector_len = 2; - __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// Longs vector left shift -instruct vsll2L(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVL dst shift)); - format %{ "psllq $dst,$shift\t! left shift packed2L" %} - ins_encode %{ - __ psllq($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll2L_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVL dst shift)); - format %{ "psllq $dst,$shift\t! left shift packed2L" %} - ins_encode %{ - __ psllq($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVL src shift)); - format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} - ins_encode %{ - int vector_len = 0; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (LShiftVL src shift)); - format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} - ins_encode %{ - int vector_len = 0; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVL src shift)); - format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} - ins_encode %{ - int vector_len = 1; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 4); - match(Set dst (LShiftVL src shift)); - format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} - ins_encode %{ - int vector_len = 1; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVL src shift)); - format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} - ins_encode %{ - int vector_len = 2; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (LShiftVL src shift)); - format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} - ins_encode %{ - int vector_len = 2; - __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// ----------------------- LogicalRightShift ----------------------------------- - // Shorts vector logical right shift produces incorrect Java result // for negative data because java code convert short value into int with // sign extension before a shift. But char vectors are fine since chars are // unsigned values. - -instruct vsrl2S(vecS dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} - ins_encode %{ - __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl2S_imm(vecS dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} - ins_encode %{ - __ psrlw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +// Shorts/Chars vector left shift +instruct vshist2S(vecS dst, vecS src, vecS shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + format %{ "vshiftw $dst,$src,$shift\t! shift packed2S" %} ins_encode %{ - int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + if (UseAVX == 0) { + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movflt($dst$$XMMRegister, $src$$XMMRegister); + __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); + } else { + int vector_len = 0; + __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + } %} ins_pipe( pipe_slow ); %} -instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vshift4S(vecD dst, vecD src, vecS shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + format %{ "vshiftw $dst,$src,$shift\t! shift packed4S" %} ins_encode %{ - int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + if (UseAVX == 0) { + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); + + } else { + int vector_len = 0; + __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + } %} ins_pipe( pipe_slow ); %} -instruct vsrl4S(vecD dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} - ins_encode %{ - __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl4S_imm(vecD dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} - ins_encode %{ - __ psrlw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); +instruct vshift8S(vecX dst, vecX src, vecS shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + format %{ "vshiftw $dst,$src,$shift\t! shift packed8S" %} ins_encode %{ - int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + if (UseAVX == 0) { + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); + } else { + int vector_len = 0; + __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + } %} ins_pipe( pipe_slow ); %} -instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} - ins_encode %{ - int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl8S(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 8); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} - ins_encode %{ - __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl8S_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 8); - match(Set dst (URShiftVS dst shift)); - format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} - ins_encode %{ - __ psrlw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} - ins_encode %{ - int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} - ins_encode %{ - int vector_len = 0; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ +instruct vshift16S(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} + format %{ "vshiftw $dst,$src,$shift\t! shift packed16S" %} ins_encode %{ int vector_len = 1; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} - ins_encode %{ - int vector_len = 1; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ +instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{ predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} + format %{ "vshiftw $dst,$src,$shift\t! shift packed32S" %} ins_encode %{ int vector_len = 2; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); - match(Set dst (URShiftVS src shift)); - format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} - ins_encode %{ - int vector_len = 2; - __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// Integers vector logical right shift -instruct vsrl2I(vecD dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVI dst shift)); - format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} - ins_encode %{ - __ psrld($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl2I_imm(vecD dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVI dst shift)); - format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} - ins_encode %{ - __ psrld($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +// Integers vector left shift +instruct vshift2I(vecD dst, vecD src, vecS shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} + format %{ "vshiftd $dst,$src,$shift\t! shift packed2I" %} ins_encode %{ - int vector_len = 0; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + if (UseAVX == 0) { + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); + __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); + } else { + int vector_len = 0; + __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + } %} ins_pipe( pipe_slow ); %} -instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); +instruct vshift4I(vecX dst, vecX src, vecS shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} + format %{ "vshiftd $dst,$src,$shift\t! shift packed4I" %} ins_encode %{ - int vector_len = 0; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + if (UseAVX == 0) { + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); + } else { + int vector_len = 0; + __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + } %} ins_pipe( pipe_slow ); %} -instruct vsrl4I(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVI dst shift)); - format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} - ins_encode %{ - __ psrld($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl4I_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVI dst shift)); - format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} - ins_encode %{ - __ psrld($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ +instruct vshift8I(vecY dst, vecY src, vecS shift) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} + format %{ "vshiftd $dst,$src,$shift\t! shift packed8I" %} ins_encode %{ int vector_len = 1; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); +instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} + format %{ "vshiftd $dst,$src,$shift\t! shift packed16I" %} + ins_encode %{ + int vector_len = 2; + int opcode = this->as_Mach()->ideal_Opcode(); + __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// Longs vector shift +instruct vshift2L(vecX dst, vecX src, vecS shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + match(Set dst (URShiftVL src shift)); + format %{ "vshiftq $dst,$src,$shift\t! shift packed2L" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + if (UseAVX == 0) { + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); + } else { + int vector_len = 0; + __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct vshift4L(vecY dst, vecY src, vecS shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVL src shift)); + match(Set dst (URShiftVL src shift)); + format %{ "vshiftq $dst,$src,$shift\t! left shift packed4L" %} ins_encode %{ int vector_len = 1; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} +instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVL src shift)); + match(Set dst (RShiftVL src shift)); + match(Set dst (URShiftVL src shift)); + format %{ "vshiftq $dst,$src,$shift\t! shift packed8L" %} ins_encode %{ int vector_len = 2; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + int opcode = this->as_Mach()->ideal_Opcode(); + __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (URShiftVI src shift)); - format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} - ins_encode %{ - int vector_len = 2; - __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// Longs vector logical right shift -instruct vsrl2L(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVL dst shift)); - format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} +// -------------------ArithmeticRightShift ----------------------------------- +// Long vector arithmetic right shift +instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ + predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{ "movdqu $dst,$src\n\t" + "psrlq $dst,$shift\n\t" + "movdqu $tmp,[0x8000000000000000]\n\t" + "psrlq $tmp,$shift\n\t" + "pxor $dst,$tmp\n\t" + "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} ins_encode %{ + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); + __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); + __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); + __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct vsrl2L_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVL dst shift)); - format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} - ins_encode %{ - __ psrlq($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVL src shift)); - format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} +instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} ins_encode %{ int vector_len = 0; - __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (URShiftVL src shift)); - format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} - ins_encode %{ - int vector_len = 0; - __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ +instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVL src shift)); - format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp, TEMP scratch); + format %{ "vpsrlq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); + __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 4); - match(Set dst (URShiftVL src shift)); - format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} +instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} ins_encode %{ int vector_len = 1; - __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} -instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (URShiftVL src shift)); - format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} - ins_encode %{ - int vector_len = 2; - __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8); - match(Set dst (URShiftVL src shift)); - format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} - ins_encode %{ - int vector_len = 2; - __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// ------------------- ArithmeticRightShift ----------------------------------- - -// Shorts/Chars vector arithmetic right shift -instruct vsra2S(vecS dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVS dst shift)); - format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} - ins_encode %{ - __ psraw($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra2S_imm(vecS dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVS dst shift)); - format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} - ins_encode %{ - __ psraw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} - ins_encode %{ - int vector_len = 0; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} - ins_encode %{ - int vector_len = 0; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4S(vecD dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVS dst shift)); - format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} - ins_encode %{ - __ psraw($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4S_imm(vecD dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVS dst shift)); - format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} - ins_encode %{ - __ psraw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} - ins_encode %{ - int vector_len = 0; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} - ins_encode %{ - int vector_len = 0; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra8S(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 8); - match(Set dst (RShiftVS dst shift)); - format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} - ins_encode %{ - __ psraw($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra8S_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 8); - match(Set dst (RShiftVS dst shift)); - format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} - ins_encode %{ - __ psraw($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} - ins_encode %{ - int vector_len = 0; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 8); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} - ins_encode %{ - int vector_len = 0; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} - ins_encode %{ - int vector_len = 1; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 16); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} - ins_encode %{ - int vector_len = 1; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} - ins_encode %{ - int vector_len = 2; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); - match(Set dst (RShiftVS src shift)); - format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} - ins_encode %{ - int vector_len = 2; - __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// Integers vector arithmetic right shift -instruct vsra2I(vecD dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVI dst shift)); - format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} - ins_encode %{ - __ psrad($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra2I_imm(vecD dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVI dst shift)); - format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} - ins_encode %{ - __ psrad($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} - ins_encode %{ - int vector_len = 0; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 2); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} - ins_encode %{ - int vector_len = 0; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4I(vecX dst, vecS shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVI dst shift)); - format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} - ins_encode %{ - __ psrad($dst$$XMMRegister, $shift$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4I_imm(vecX dst, immI8 shift) %{ - predicate(UseAVX == 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVI dst shift)); - format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} - ins_encode %{ - __ psrad($dst$$XMMRegister, (int)$shift$$constant); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} - ins_encode %{ - int vector_len = 1; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} - ins_encode %{ - int vector_len = 1; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} - ins_encode %{ - int vector_len = 2; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 16); - match(Set dst (RShiftVI src shift)); - format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} - ins_encode %{ - int vector_len = 2; - __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -// There are no longs vector arithmetic right shift instructions. - - // --------------------------------- AND -------------------------------------- instruct vand4B(vecS dst, vecS src) %{ @@ -9708,6 +9297,291 @@ instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ ins_pipe( pipe_slow ); %} +// --------------------------------- ABS -------------------------------------- +// a = |a| +instruct vabs4B_reg(vecS dst, vecS src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVB src)); + format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %} + ins_encode %{ + __ pabsb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs8B_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVB src)); + format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} + ins_encode %{ + __ pabsb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs16B_reg(vecX dst, vecX src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 16); + match(Set dst (AbsVB src)); + format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} + ins_encode %{ + __ pabsb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs32B_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (AbsVB src)); + format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} + ins_encode %{ + int vector_len = 1; + __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs64B_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 64); + match(Set dst (AbsVB src)); + format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} + ins_encode %{ + int vector_len = 2; + __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs2S_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVS src)); + format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %} + ins_encode %{ + __ pabsw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs4S_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVS src)); + format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} + ins_encode %{ + __ pabsw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs8S_reg(vecX dst, vecX src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVS src)); + format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} + ins_encode %{ + __ pabsw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs16S_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (AbsVS src)); + format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} + ins_encode %{ + int vector_len = 1; + __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs32S_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 32); + match(Set dst (AbsVS src)); + format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} + ins_encode %{ + int vector_len = 2; + __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs2I_reg(vecD dst, vecD src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVI src)); + format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} + ins_encode %{ + __ pabsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs4I_reg(vecX dst, vecX src) %{ + predicate(UseSSE > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVI src)); + format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} + ins_encode %{ + __ pabsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs8I_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AbsVI src)); + format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} + ins_encode %{ + int vector_len = 1; + __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs16I_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AbsVI src)); + format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} + ins_encode %{ + int vector_len = 2; + __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs2L_reg(vecX dst, vecX src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVL src)); + format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} + ins_encode %{ + int vector_len = 0; + __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs4L_reg(vecY dst, vecY src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 4); + match(Set dst (AbsVL src)); + format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} + ins_encode %{ + int vector_len = 1; + __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabs8L_reg(vecZ dst, vecZ src) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVL src)); + format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} + ins_encode %{ + int vector_len = 2; + __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ABSNEG -------------------------------------- + +instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{ + predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); + match(Set dst (AbsVD src)); + match(Set dst (NegVD src)); + effect(TEMP scratch); + format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed2D" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + __ vabsnegd(opcode, $dst$$XMMRegister, $scratch$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AbsVD src)); + match(Set dst (NegVD src)); + effect(TEMP scratch); + format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed4D" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + int vector_len = 1; + __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (AbsVD src)); + match(Set dst (NegVD src)); + effect(TEMP scratch); + format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed8D" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + int vector_len = 2; + __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{ + predicate(UseSSE > 0 && n->as_Vector()->length() == 2); + match(Set dst (AbsVF src)); + match(Set dst (NegVF src)); + effect(TEMP scratch); + format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed2F" %} + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + if ($dst$$XMMRegister != $src$$XMMRegister) + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabsneg4F(vecX dst, rRegI scratch) %{ + predicate(UseSSE > 0 && n->as_Vector()->length() == 4); + match(Set dst (AbsVF dst)); + match(Set dst (NegVF dst)); + effect(TEMP scratch); + format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} + ins_cost(150); + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AbsVF src)); + match(Set dst (NegVF src)); + effect(TEMP scratch); + format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed8F" %} + ins_cost(150); + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + int vector_len = 1; + __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 16); + match(Set dst (AbsVF src)); + match(Set dst (NegVF src)); + effect(TEMP scratch); + format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed16F" %} + ins_cost(150); + ins_encode %{ + int opcode = this->as_Mach()->ideal_Opcode(); + int vector_len = 2; + __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); + %} + ins_pipe( pipe_slow ); +%} + // --------------------------------- FMA -------------------------------------- // a * b + c diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad index ef5125499ff..a59ac197ae1 100644 --- a/src/hotspot/cpu/x86/x86_32.ad +++ b/src/hotspot/cpu/x86/x86_32.ad @@ -8949,6 +8949,28 @@ instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) ins_pipe(ialu_reg_reg_alu0); %} +// Integer Absolute Instructions +instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) +%{ + match(Set dst (AbsI src)); + effect(TEMP dst, TEMP tmp, KILL cr); + format %{ "movl $tmp, $src\n\t" + "sarl $tmp, 31\n\t" + "movl $dst, $src\n\t" + "xorl $dst, $tmp\n\t" + "subl $dst, $tmp\n" + %} + ins_encode %{ + __ movl($tmp$$Register, $src$$Register); + __ sarl($tmp$$Register, 31); + __ movl($dst$$Register, $src$$Register); + __ xorl($dst$$Register, $tmp$$Register); + __ subl($dst$$Register, $tmp$$Register); + %} + + ins_pipe(ialu_reg_reg); +%} + //----------Long Instructions------------------------------------------------ // Add Long Register with Register instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 81c55618cb0..a1c4b492abd 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -8181,6 +8181,52 @@ instruct xchgN( memory mem, rRegN newval) %{ ins_pipe( pipe_cmpxchg ); %} +//----------Abs Instructions------------------------------------------- + +// Integer Absolute Instructions +instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr) +%{ + match(Set dst (AbsI src)); + effect(TEMP dst, TEMP tmp, KILL cr); + format %{ "movl $tmp, $src\n\t" + "sarl $tmp, 31\n\t" + "movl $dst, $src\n\t" + "xorl $dst, $tmp\n\t" + "subl $dst, $tmp\n" + %} + ins_encode %{ + __ movl($tmp$$Register, $src$$Register); + __ sarl($tmp$$Register, 31); + __ movl($dst$$Register, $src$$Register); + __ xorl($dst$$Register, $tmp$$Register); + __ subl($dst$$Register, $tmp$$Register); + %} + + ins_pipe(ialu_reg_reg); +%} + +// Long Absolute Instructions +instruct absL_rReg(rRegL dst, rRegL src, rRegL tmp, rFlagsReg cr) +%{ + match(Set dst (AbsL src)); + effect(TEMP dst, TEMP tmp, KILL cr); + format %{ "movq $tmp, $src\n\t" + "sarq $tmp, 63\n\t" + "movq $dst, $src\n\t" + "xorq $dst, $tmp\n\t" + "subq $dst, $tmp\n" + %} + ins_encode %{ + __ movq($tmp$$Register, $src$$Register); + __ sarq($tmp$$Register, 63); + __ movq($dst$$Register, $src$$Register); + __ xorq($dst$$Register, $tmp$$Register); + __ subq($dst$$Register, $tmp$$Register); + %} + + ins_pipe(ialu_reg_reg); +%} + //----------Subtraction Instructions------------------------------------------- // Integer Subtraction Instructions diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index d75c5aa2b95..76129fb0fe7 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -3808,7 +3808,7 @@ void MatchNode::count_commutative_op(int& count) { "MaxI","MinI","MaxF","MinF","MaxD","MinD", "MaxV", "MinV", "MulI","MulL","MulF","MulD", - "MulVS","MulVI","MulVL","MulVF","MulVD", + "MulVB","MulVS","MulVI","MulVL","MulVF","MulVD", "OrI","OrL", "OrV", "XorI","XorL", @@ -4175,10 +4175,10 @@ bool MatchRule::is_vector() const { static const char *vector_list[] = { "AddVB","AddVS","AddVI","AddVL","AddVF","AddVD", "SubVB","SubVS","SubVI","SubVL","SubVF","SubVD", - "MulVS","MulVI","MulVL","MulVF","MulVD", + "MulVB","MulVS","MulVI","MulVL","MulVF","MulVD", "CMoveVD", "CMoveVF", "DivVF","DivVD", - "AbsVF","AbsVD", + "AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD", "NegVF","NegVD", "SqrtVD","SqrtVF", "AndV" ,"XorV" ,"OrV", diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp index aa2bbe406c8..26b19297ef9 100644 --- a/src/hotspot/share/classfile/vmSymbols.cpp +++ b/src/hotspot/share/classfile/vmSymbols.cpp @@ -363,6 +363,9 @@ bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) { case vmIntrinsics::_isInstance: case vmIntrinsics::_currentThread: case vmIntrinsics::_dabs: + case vmIntrinsics::_fabs: + case vmIntrinsics::_iabs: + case vmIntrinsics::_labs: case vmIntrinsics::_dsqrt: case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: @@ -404,6 +407,9 @@ bool vmIntrinsics::can_trap(vmIntrinsics::ID id) { case vmIntrinsics::_longBitsToDouble: case vmIntrinsics::_currentThread: case vmIntrinsics::_dabs: + case vmIntrinsics::_fabs: + case vmIntrinsics::_iabs: + case vmIntrinsics::_labs: case vmIntrinsics::_dsqrt: case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: @@ -567,6 +573,9 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { case vmIntrinsics::_doubleToRawLongBits: case vmIntrinsics::_longBitsToDouble: case vmIntrinsics::_dabs: + case vmIntrinsics::_fabs: + case vmIntrinsics::_iabs: + case vmIntrinsics::_labs: case vmIntrinsics::_dsqrt: case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp index 77b978d1850..d997070cc9f 100644 --- a/src/hotspot/share/classfile/vmSymbols.hpp +++ b/src/hotspot/share/classfile/vmSymbols.hpp @@ -472,6 +472,7 @@ template(float_int_signature, "(F)I") \ template(double_long_signature, "(D)J") \ template(double_double_signature, "(D)D") \ + template(float_float_signature, "(F)F") \ template(int_float_signature, "(I)F") \ template(long_int_signature, "(J)I") \ template(long_long_signature, "(J)J") \ @@ -771,6 +772,9 @@ do_name(fma_name, "fma") \ \ do_intrinsic(_dabs, java_lang_Math, abs_name, double_double_signature, F_S) \ + do_intrinsic(_fabs, java_lang_Math, abs_name, float_float_signature, F_S) \ + do_intrinsic(_iabs, java_lang_Math, abs_name, int_int_signature, F_S) \ + do_intrinsic(_labs, java_lang_Math, abs_name, long_long_signature, F_S) \ do_intrinsic(_dsin, java_lang_Math, sin_name, double_double_signature, F_S) \ do_intrinsic(_dcos, java_lang_Math, cos_name, double_double_signature, F_S) \ do_intrinsic(_dtan, java_lang_Math, tan_name, double_double_signature, F_S) \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index 21cead68bc2..e20efaeae61 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -460,6 +460,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: case vmIntrinsics::_dabs: + case vmIntrinsics::_fabs: + case vmIntrinsics::_iabs: + case vmIntrinsics::_labs: case vmIntrinsics::_datan2: case vmIntrinsics::_dsqrt: case vmIntrinsics::_dexp: diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index bb6565e7017..b0b537a59de 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -30,6 +30,7 @@ macro(AbsD) macro(AbsF) macro(AbsI) +macro(AbsL) macro(AddD) macro(AddF) macro(AddI) @@ -335,6 +336,7 @@ macro(SubVI) macro(SubVL) macro(SubVF) macro(SubVD) +macro(MulVB) macro(MulVS) macro(MulVI) macro(MulReductionVI) @@ -349,6 +351,10 @@ macro(FmaVD) macro(FmaVF) macro(DivVF) macro(DivVD) +macro(AbsVB) +macro(AbsVS) +macro(AbsVI) +macro(AbsVL) macro(AbsVF) macro(AbsVD) macro(NegVF) diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index f8c5cededc1..6075a5746bd 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -227,6 +227,7 @@ class LibraryCallKit : public GraphKit { bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_math_native(vmIntrinsics::ID id); bool inline_math(vmIntrinsics::ID id); + bool inline_double_math(vmIntrinsics::ID id); template bool inline_math_overflow(Node* arg1, Node* arg2); void inline_math_mathExact(Node* math, Node* test); @@ -533,6 +534,9 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: case vmIntrinsics::_dabs: + case vmIntrinsics::_fabs: + case vmIntrinsics::_iabs: + case vmIntrinsics::_labs: case vmIntrinsics::_datan2: case vmIntrinsics::_dsqrt: case vmIntrinsics::_dexp: @@ -1793,7 +1797,7 @@ Node* LibraryCallKit::round_double_node(Node* n) { // public static double Math.sqrt(double) // public static double Math.log(double) // public static double Math.log10(double) -bool LibraryCallKit::inline_math(vmIntrinsics::ID id) { +bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) { Node* arg = round_double_node(argument(0)); Node* n = NULL; switch (id) { @@ -1805,6 +1809,23 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) { return true; } +//------------------------------inline_math----------------------------------- +// public static float Math.abs(float) +// public static int Math.abs(int) +// public static long Math.abs(long) +bool LibraryCallKit::inline_math(vmIntrinsics::ID id) { + Node* arg = argument(0); + Node* n = NULL; + switch (id) { + case vmIntrinsics::_fabs: n = new AbsFNode( arg); break; + case vmIntrinsics::_iabs: n = new AbsINode( arg); break; + case vmIntrinsics::_labs: n = new AbsLNode( arg); break; + default: fatal_unexpected_iid(id); break; + } + set_result(_gvn.transform(n)); + return true; +} + //------------------------------runtime_math----------------------------- bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) { assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(), @@ -1855,8 +1876,11 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) { runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10"); // These intrinsics are supported on all hardware - case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false; - case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false; + case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false; + case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_double_math(id) : false; + case vmIntrinsics::_fabs: return Matcher::match_rule_supported(Op_AbsF) ? inline_math(id) : false; + case vmIntrinsics::_iabs: return Matcher::match_rule_supported(Op_AbsI) ? inline_math(id) : false; + case vmIntrinsics::_labs: return Matcher::match_rule_supported(Op_AbsL) ? inline_math(id) : false; case vmIntrinsics::_dexp: return StubRoutines::dexp() != NULL ? diff --git a/src/hotspot/share/opto/subnode.hpp b/src/hotspot/share/opto/subnode.hpp index c29d9cdffc9..57736ba301e 100644 --- a/src/hotspot/share/opto/subnode.hpp +++ b/src/hotspot/share/opto/subnode.hpp @@ -350,6 +350,17 @@ public: virtual uint ideal_reg() const { return Op_RegI; } }; +//------------------------------AbsLNode--------------------------------------- +// Absolute value a long. Since a naive graph involves control flow, we +// "match" it in the ideal world (so the control flow can be removed). +class AbsLNode : public AbsNode { +public: + AbsLNode( Node *in1 ) : AbsNode(in1) {} + virtual int Opcode() const; + const Type *bottom_type() const { return TypeLong::LONG; } + virtual uint ideal_reg() const { return Op_RegL; } +}; + //------------------------------AbsFNode--------------------------------------- // Absolute value a float, a common float-point idiom with a cheap hardware // implemention on most chips. Since a naive graph involves control flow, we diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index dedadbf6e6f..d91c6943963 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2453,6 +2453,7 @@ void SuperWord::output() { } } else if (opc == Op_SqrtF || opc == Op_SqrtD || opc == Op_AbsF || opc == Op_AbsD || + opc == Op_AbsI || opc == Op_AbsL || opc == Op_NegF || opc == Op_NegD || opc == Op_PopCountI) { assert(n->req() == 2, "only one input expected"); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 861dc627756..a7be8d945f8 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -70,8 +70,8 @@ int VectorNode::opcode(int sopc, BasicType bt) { return Op_SubVD; case Op_MulI: switch (bt) { - case T_BOOLEAN: - case T_BYTE: return 0; // Unimplemented + case T_BOOLEAN:return 0; + case T_BYTE: return Op_MulVB; case T_CHAR: case T_SHORT: return Op_MulVS; case T_INT: return Op_MulVI; @@ -104,6 +104,18 @@ int VectorNode::opcode(int sopc, BasicType bt) { case Op_DivD: assert(bt == T_DOUBLE, "must be"); return Op_DivVD; + case Op_AbsI: + switch (bt) { + case T_BOOLEAN: + case T_CHAR: return 0; // abs does not make sense for unsigned + case T_BYTE: return Op_AbsVB; + case T_SHORT: return Op_AbsVS; + case T_INT: return Op_AbsVI; + default: ShouldNotReachHere(); return 0; + } + case Op_AbsL: + assert(bt == T_LONG, "must be"); + return Op_AbsVL; case Op_AbsF: assert(bt == T_FLOAT, "must be"); return Op_AbsVF; @@ -350,6 +362,7 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b case Op_SubVF: return new SubVFNode(n1, n2, vt); case Op_SubVD: return new SubVDNode(n1, n2, vt); + case Op_MulVB: return new MulVBNode(n1, n2, vt); case Op_MulVS: return new MulVSNode(n1, n2, vt); case Op_MulVI: return new MulVINode(n1, n2, vt); case Op_MulVL: return new MulVLNode(n1, n2, vt); @@ -359,6 +372,10 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b case Op_DivVF: return new DivVFNode(n1, n2, vt); case Op_DivVD: return new DivVDNode(n1, n2, vt); + case Op_AbsVB: return new AbsVBNode(n1, vt); + case Op_AbsVS: return new AbsVSNode(n1, vt); + case Op_AbsVI: return new AbsVINode(n1, vt); + case Op_AbsVL: return new AbsVLNode(n1, vt); case Op_AbsVF: return new AbsVFNode(n1, vt); case Op_AbsVD: return new AbsVDNode(n1, vt); diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index a49b69e99c7..84ba5190bd4 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -224,6 +224,14 @@ class SubVDNode : public VectorNode { virtual int Opcode() const; }; +//------------------------------MulVBNode-------------------------------------- +// Vector multiply byte +class MulVBNode : public VectorNode { + public: + MulVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {} + virtual int Opcode() const; +}; + //------------------------------MulVSNode-------------------------------------- // Vector multiply short class MulVSNode : public VectorNode { @@ -360,6 +368,38 @@ class DivVDNode : public VectorNode { virtual int Opcode() const; }; +//------------------------------AbsVBNode-------------------------------------- +// Vector Abs byte +class AbsVBNode : public VectorNode { +public: + AbsVBNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {} + virtual int Opcode() const; +}; + +//------------------------------AbsVSNode-------------------------------------- +// Vector Abs short +class AbsVSNode : public VectorNode { +public: + AbsVSNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {} + virtual int Opcode() const; +}; + +//------------------------------AbsVINode-------------------------------------- +// Vector Abs int +class AbsVINode : public VectorNode { +public: + AbsVINode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {} + virtual int Opcode() const; +}; + +//------------------------------AbsVLNode-------------------------------------- +// Vector Abs long +class AbsVLNode : public VectorNode { +public: + AbsVLNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {} + virtual int Opcode() const; +}; + //------------------------------AbsVFNode-------------------------------------- // Vector Abs float class AbsVFNode : public VectorNode { diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index f201c073cc3..3821cc7ff1d 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -1758,6 +1758,10 @@ typedef PaddedEnd PaddedObjectMonitor; declare_c2_type(ReverseBytesLNode, Node) \ declare_c2_type(ReductionNode, Node) \ declare_c2_type(VectorNode, Node) \ + declare_c2_type(AbsVBNode, VectorNode) \ + declare_c2_type(AbsVSNode, VectorNode) \ + declare_c2_type(AbsVINode, VectorNode) \ + declare_c2_type(AbsVLNode, VectorNode) \ declare_c2_type(AddVBNode, VectorNode) \ declare_c2_type(AddVSNode, VectorNode) \ declare_c2_type(AddVINode, VectorNode) \ @@ -1774,6 +1778,7 @@ typedef PaddedEnd PaddedObjectMonitor; declare_c2_type(SubVLNode, VectorNode) \ declare_c2_type(SubVFNode, VectorNode) \ declare_c2_type(SubVDNode, VectorNode) \ + declare_c2_type(MulVBNode, VectorNode) \ declare_c2_type(MulVSNode, VectorNode) \ declare_c2_type(MulVLNode, VectorNode) \ declare_c2_type(MulReductionVLNode, ReductionNode) \ @@ -1782,6 +1787,8 @@ typedef PaddedEnd PaddedObjectMonitor; declare_c2_type(MulVFNode, VectorNode) \ declare_c2_type(MulReductionVFNode, ReductionNode) \ declare_c2_type(MulVDNode, VectorNode) \ + declare_c2_type(NegVFNode, VectorNode) \ + declare_c2_type(NegVDNode, VectorNode) \ declare_c2_type(FmaVDNode, VectorNode) \ declare_c2_type(FmaVFNode, VectorNode) \ declare_c2_type(CMoveVFNode, VectorNode) \ diff --git a/src/java.base/share/classes/java/lang/Math.java b/src/java.base/share/classes/java/lang/Math.java index 46973bbeccb..518169bbf08 100644 --- a/src/java.base/share/classes/java/lang/Math.java +++ b/src/java.base/share/classes/java/lang/Math.java @@ -1353,6 +1353,7 @@ public final class Math { * @param a the argument whose absolute value is to be determined * @return the absolute value of the argument. */ + @HotSpotIntrinsicCandidate public static int abs(int a) { return (a < 0) ? -a : a; } @@ -1370,6 +1371,7 @@ public final class Math { * @param a the argument whose absolute value is to be determined * @return the absolute value of the argument. */ + @HotSpotIntrinsicCandidate public static long abs(long a) { return (a < 0) ? -a : a; } @@ -1394,6 +1396,7 @@ public final class Math { * @param a the argument whose absolute value is to be determined * @return the absolute value of the argument. */ + @HotSpotIntrinsicCandidate public static float abs(float a) { return (a <= 0.0F) ? 0.0F - a : a; } diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java index 0d0453cd71d..a9e389d81af 100644 --- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java +++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CheckGraalIntrinsics.java @@ -398,6 +398,9 @@ public class CheckGraalIntrinsics extends GraalTest { if (isJDK13OrHigher()) { add(toBeInvestigated, + "java/lang/Math.abs(F)F", + "java/lang/Math.abs(I)I", + "java/lang/Math.abs(J)J", "java/lang/Math.max(DD)D", "java/lang/Math.max(FF)F", "java/lang/Math.min(DD)D", diff --git a/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java b/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java index 63f4d449f13..a4d3c610e35 100644 --- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java +++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java @@ -86,6 +86,7 @@ public class TestDoubleVect { test_divc_n(a0, a1); test_divv(a0, a1, -VALUE); test_diva(a0, a1, a3); + test_negc(a0, a1); } // Test and verify results System.out.println("Verification"); @@ -339,6 +340,16 @@ public class TestDoubleVect { for (int i=12; i 0) @@ -469,6 +481,13 @@ public class TestFloatVect { end = System.currentTimeMillis(); System.out.println("test_diva_n: " + (end - start)); + start = System.currentTimeMillis(); + for (int i=0; i Date: Tue, 7 May 2019 14:05:09 -0700 Subject: [PATCH 5/7] 8223464: Improve version string for Oracle CI builds Reviewed-by: tbell --- make/autoconf/version-numbers | 1 + make/conf/jib-profiles.js | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/make/autoconf/version-numbers b/make/autoconf/version-numbers index 8e875a97d4b..2e5270839ad 100644 --- a/make/autoconf/version-numbers +++ b/make/autoconf/version-numbers @@ -37,6 +37,7 @@ DEFAULT_VERSION_CLASSFILE_MAJOR=57 # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`" DEFAULT_VERSION_CLASSFILE_MINOR=0 DEFAULT_ACCEPTABLE_BOOT_VERSIONS="12 13" DEFAULT_JDK_SOURCE_TARGET_VERSION=13 +DEFAULT_PROMOTED_VERSION_PRE=ea LAUNCHER_NAME=openjdk PRODUCT_NAME=OpenJDK diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js index 4a3cdc3536c..df39b8e8ba7 100644 --- a/make/conf/jib-profiles.js +++ b/make/conf/jib-profiles.js @@ -1301,10 +1301,16 @@ var versionArgs = function(input, common) { var args = ["--with-version-build=" + common.build_number]; if (input.build_type == "promoted") { args = concat(args, - // This needs to be changed when we start building release candidates - // with-version-pre must be set to ea for 'ea' and empty for fcs build - "--with-version-pre=ea", + "--with-version-pre=" + version_numbers.get("DEFAULT_PROMOTION_VERSION_PRE"), "--without-version-opt"); + } else if (input.build_type == "ci") { + var optString = input.build_id_data.ciBuildNumber; + var preString = input.build_id_data.projectName; + if (preString == "jdk") { + preString = version_numbers.get("DEFAULT_PROMOTED_VERSION_PRE"); + } + args = concat(args, "--with-version-pre=" + preString, + "--with-version-opt=" + optString); } else { args = concat(args, "--with-version-opt=" + common.build_id); } From b0d52bcca1c93be7809213ca567b9ed140588849 Mon Sep 17 00:00:00 2001 From: David Holmes Date: Tue, 7 May 2019 17:33:19 -0400 Subject: [PATCH 6/7] 8223437: Backout JDK-8219974 Restore static callsite resolution for the current class Reviewed-by: jwilhelm, vlivanov --- .../share/interpreter/interpreterRuntime.cpp | 18 ++++++--------- src/hotspot/share/oops/cpCache.cpp | 23 ++++++------------- src/hotspot/share/oops/cpCache.hpp | 6 ++--- src/hotspot/share/runtime/sharedRuntime.cpp | 20 ++++++---------- 4 files changed, 23 insertions(+), 44 deletions(-) diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp index c183a5f7360..cef442f2e5e 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp @@ -920,23 +920,19 @@ void InterpreterRuntime::resolve_invoke(JavaThread* thread, Bytecodes::Code byte info.call_kind() == CallInfo::vtable_call, ""); } #endif + // Get sender or sender's unsafe_anonymous_host, and only set cpCache entry to resolved if + // it is not an interface. The receiver for invokespecial calls within interface + // methods must be checked for every call. + InstanceKlass* sender = pool->pool_holder(); + sender = sender->is_unsafe_anonymous() ? sender->unsafe_anonymous_host() : sender; switch (info.call_kind()) { - case CallInfo::direct_call: { - // Get sender or sender's unsafe_anonymous_host, and only set cpCache entry to resolved if - // it is not an interface. The receiver for invokespecial calls within interface - // methods must be checked for every call. - InstanceKlass* pool_holder = pool->pool_holder(); - InstanceKlass* sender = pool_holder->is_unsafe_anonymous() ? - pool_holder->unsafe_anonymous_host() : pool_holder; - + case CallInfo::direct_call: cp_cache_entry->set_direct_call( bytecode, info.resolved_method(), - sender->is_interface(), - pool_holder); + sender->is_interface()); break; - } case CallInfo::vtable_call: cp_cache_entry->set_vtable_call( bytecode, diff --git a/src/hotspot/share/oops/cpCache.cpp b/src/hotspot/share/oops/cpCache.cpp index edff74a9589..413d2576bb4 100644 --- a/src/hotspot/share/oops/cpCache.cpp +++ b/src/hotspot/share/oops/cpCache.cpp @@ -168,8 +168,7 @@ void ConstantPoolCacheEntry::set_parameter_size(int value) { void ConstantPoolCacheEntry::set_direct_or_vtable_call(Bytecodes::Code invoke_code, const methodHandle& method, int vtable_index, - bool sender_is_interface, - InstanceKlass* pool_holder) { + bool sender_is_interface) { bool is_vtable_call = (vtable_index >= 0); // FIXME: split this method on this boolean assert(method->interpreter_entry() != NULL, "should have been set at this point"); assert(!method->is_obsolete(), "attempt to write obsolete method to cpCache"); @@ -264,17 +263,9 @@ void ConstantPoolCacheEntry::set_direct_or_vtable_call(Bytecodes::Code invoke_co } // Don't mark invokestatic to method as resolved if the holder class has not yet completed // initialization. An invokestatic must only proceed if the class is initialized, but if - // we resolve it before then that class initialization check is skipped. However if the call - // is from the same class we can resolve as we must be executing with on our call stack. - if (invoke_code == Bytecodes::_invokestatic) { - if (!method->method_holder()->is_initialized() && - method->method_holder() != pool_holder) { - do_resolve = false; - } else { - assert(method->method_holder()->is_initialized() || - method->method_holder()->is_reentrant_initialization(Thread::current()), - "invalid class initialization state for invoke_static"); - } + // we resolve it before then that class initialization check is skipped. + if (invoke_code == Bytecodes::_invokestatic && !method->method_holder()->is_initialized()) { + do_resolve = false; } if (do_resolve) { set_bytecode_1(invoke_code); @@ -319,17 +310,17 @@ void ConstantPoolCacheEntry::set_direct_or_vtable_call(Bytecodes::Code invoke_co } void ConstantPoolCacheEntry::set_direct_call(Bytecodes::Code invoke_code, const methodHandle& method, - bool sender_is_interface, InstanceKlass* pool_holder) { + bool sender_is_interface) { int index = Method::nonvirtual_vtable_index; // index < 0; FIXME: inline and customize set_direct_or_vtable_call - set_direct_or_vtable_call(invoke_code, method, index, sender_is_interface, pool_holder); + set_direct_or_vtable_call(invoke_code, method, index, sender_is_interface); } void ConstantPoolCacheEntry::set_vtable_call(Bytecodes::Code invoke_code, const methodHandle& method, int index) { // either the method is a miranda or its holder should accept the given index assert(method->method_holder()->is_interface() || method->method_holder()->verify_vtable_index(index), ""); // index >= 0; FIXME: inline and customize set_direct_or_vtable_call - set_direct_or_vtable_call(invoke_code, method, index, false, NULL /* not used */); + set_direct_or_vtable_call(invoke_code, method, index, false); } void ConstantPoolCacheEntry::set_itable_call(Bytecodes::Code invoke_code, diff --git a/src/hotspot/share/oops/cpCache.hpp b/src/hotspot/share/oops/cpCache.hpp index 8aa665fc440..d3d9cb6f02d 100644 --- a/src/hotspot/share/oops/cpCache.hpp +++ b/src/hotspot/share/oops/cpCache.hpp @@ -230,16 +230,14 @@ class ConstantPoolCacheEntry { Bytecodes::Code invoke_code, // the bytecode used for invoking the method const methodHandle& method, // the method/prototype if any (NULL, otherwise) int vtable_index, // the vtable index if any, else negative - bool sender_is_interface, // 'logical' sender (may be host of VMAC) - InstanceKlass* pool_holder // class from which the call is made + bool sender_is_interface ); public: void set_direct_call( // sets entry to exact concrete method entry Bytecodes::Code invoke_code, // the bytecode used for invoking the method const methodHandle& method, // the method to call - bool sender_is_interface, // 'logical' sender (may be host of VMAC) - InstanceKlass* pool_holder // class from which the call is made + bool sender_is_interface ); void set_vtable_call( // sets entry to vtable index diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 70e37516e9e..90d095d5087 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1376,18 +1376,12 @@ methodHandle SharedRuntime::resolve_sub_helper(JavaThread *thread, } #endif - // Do not patch call site for static call to another class - // when the class is not fully initialized. - if (invoke_code == Bytecodes::_invokestatic) { - if (!callee_method->method_holder()->is_initialized() && - callee_method->method_holder() != caller_nm->method()->method_holder()) { - assert(callee_method->method_holder()->is_linked(), "must be"); - return callee_method; - } else { - assert(callee_method->method_holder()->is_initialized() || - callee_method->method_holder()->is_reentrant_initialization(thread), - "invalid class initialization state for invoke_static"); - } + // Do not patch call site for static call when the class is not + // fully initialized. + if (invoke_code == Bytecodes::_invokestatic && + !callee_method->method_holder()->is_initialized()) { + assert(callee_method->method_holder()->is_linked(), "must be"); + return callee_method; } // JSR 292 key invariant: From 260ae30b14c8d523102a1353cea43942343c5f2f Mon Sep 17 00:00:00 2001 From: Coleen Phillimore Date: Tue, 7 May 2019 18:24:36 -0400 Subject: [PATCH 7/7] 8223481: gtest/GTestWrapper.java failed due to "assert(ret == 0) failed: sem_post failed; error='Invalid argument' (errno=EINVAL)" Remove the test for now. Reviewed-by: dholmes --- test/hotspot/gtest/oops/test_markOop.cpp | 140 ----------------------- 1 file changed, 140 deletions(-) delete mode 100644 test/hotspot/gtest/oops/test_markOop.cpp diff --git a/test/hotspot/gtest/oops/test_markOop.cpp b/test/hotspot/gtest/oops/test_markOop.cpp deleted file mode 100644 index 323937b4e50..00000000000 --- a/test/hotspot/gtest/oops/test_markOop.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "precompiled.hpp" -#include "classfile/systemDictionary.hpp" -#include "memory/resourceArea.hpp" -#include "memory/universe.hpp" -#include "oops/oop.inline.hpp" -#include "runtime/atomic.hpp" -#include "runtime/interfaceSupport.inline.hpp" -#include "runtime/orderAccess.hpp" -#include "runtime/os.hpp" -#include "runtime/synchronizer.hpp" -#include "threadHelper.inline.hpp" -#include "unittest.hpp" -#include "utilities/globalDefinitions.hpp" -#include "utilities/ostream.hpp" - -// The test doesn't work for PRODUCT because it needs WizardMode -#ifndef PRODUCT -static bool test_pattern(stringStream* st, const char* pattern) { - return (strstr(st->as_string(), pattern) != NULL); -} - -static void assert_test_pattern(Handle object, const char* pattern) { - stringStream st; - object->print_on(&st); - ASSERT_TRUE(test_pattern(&st, pattern)) << pattern << " not in " << st.as_string(); -} - -static void assert_not_test_pattern(Handle object, const char* pattern) { - stringStream st; - object->print_on(&st); - ASSERT_FALSE(test_pattern(&st, pattern)) << pattern << " found in " << st.as_string(); -} - -class LockerThread : public JavaTestThread { - oop _obj; - public: - LockerThread(Semaphore* post, oop obj) : JavaTestThread(post), _obj(obj) {} - virtual ~LockerThread() {} - - void main_run() { - Thread* THREAD = Thread::current(); - HandleMark hm(THREAD); - Handle h_obj(THREAD, _obj); - ResourceMark rm(THREAD); - - // Wait gets the lock inflated. - // The object will stay locked for the context of 'ol' so the lock will - // still be inflated after the notify_all() call. Deflation can't happen - // while an ObjectMonitor is "busy" and being locked is the most "busy" - // state we have... - ObjectLocker ol(h_obj, THREAD); - ol.notify_all(THREAD); - assert_test_pattern(h_obj, "monitor"); - } -}; - - -TEST_VM(markOopDesc, printing) { - JavaThread* THREAD = JavaThread::current(); - ThreadInVMfromNative invm(THREAD); - ResourceMark rm(THREAD); - - oop obj = SystemDictionary::Byte_klass()->allocate_instance(THREAD); - - FlagSetting fs(WizardMode, true); - FlagSetting bf(UseBiasedLocking, true); - - HandleMark hm(THREAD); - Handle h_obj(THREAD, obj); - - // Biased locking is initially enabled for this java.lang.Byte object. - assert_test_pattern(h_obj, "is_biased"); - - // Lock using biased locking. - BasicObjectLock lock; - lock.set_obj(obj); - markOop mark = obj->mark()->incr_bias_epoch(); - obj->set_mark(mark); - ObjectSynchronizer::fast_enter(h_obj, lock.lock(), true, THREAD); -#ifdef _LP64 - // Look for the biased_locker in markOop, not prototype_header. - assert_not_test_pattern(h_obj, "mark(is_biased biased_locker=0x0000000000000000"); -#endif - - // Same thread tries to lock it again. - { - ObjectLocker ol(h_obj, THREAD); - assert_test_pattern(h_obj, "locked"); - } - - // This is no longer biased, because ObjectLocker revokes the bias. - assert_test_pattern(h_obj, "is_neutral no_hash"); - - // Wait gets the lock inflated. - { - ObjectLocker ol(h_obj, THREAD); - - Semaphore done(0); - LockerThread* st; - st = new LockerThread(&done, h_obj()); - st->doit(); - - ol.wait(THREAD); - assert_test_pattern(h_obj, "monitor"); - } - - // Make the object older. Not all GCs use this field. - Universe::heap()->collect(GCCause::_java_lang_system_gc); - if (UseParallelGC) { - assert_test_pattern(h_obj, "is_neutral no_hash age 1"); - } - - // Hash the object then print it. - intx hash = h_obj->identity_hash(); - assert_test_pattern(h_obj, "is_neutral hash=0x"); -} -#endif // PRODUCT