8285081: Improve XPath operators count accuracy

Reviewed-by: naoto, lancea
This commit is contained in:
Joe Wang 2022-06-07 20:49:45 +00:00
parent b12e7f1bf9
commit 8e07839179
3 changed files with 60 additions and 23 deletions

View File

@ -137,7 +137,7 @@ import java.util.Stack;
* @see com.sun.java_cup.internal.runtime.virtual_parse_stack
* @author Frank Flannery
*
* @LastModified: Jan 2022
* @LastModified: June 2022
*/
public abstract class lr_parser {
@ -149,6 +149,7 @@ public abstract class lr_parser {
private int grpCount = 0;
private int opCount = 0;
private int totalOpCount = 0;
private int lastSym;
/*-----------------------------------------------------------*/
/*--- Constructor(s) ----------------------------------------*/
@ -377,13 +378,17 @@ public abstract class lr_parser {
opCount++; // function
isLiteral = false;
} else if (contains(sym.OPERATORS, s.sym)) {
// axis nodetest is counted as one step, so not counted if last=DCOLON
if (lastSym != sym.DCOLON) {
opCount++;
}
isLiteral = false;
}
if (s.sym == sym.Literal || s.sym == sym.QNAME) {
isLiteral = true;
}
lastSym = s.sym;
return s;
}
@ -588,6 +593,7 @@ public abstract class lr_parser {
isLiteral = false;
grpCount = 0;
opCount = 0;
lastSym = -1;
/* get the first token */
cur_token = scan();

View File

@ -25,13 +25,11 @@
package com.sun.org.apache.xalan.internal.xsltc.compiler;
import java.util.Arrays;
/**
* CUP generated class containing symbol constants.
* This class was generated by CUP v0.10j on Fri Feb 27 13:01:50 PST 2004.
*
* @LastModified: Jan 2022
* @LastModified: June 2022
*/
public class sym {
/* terminals */
@ -92,9 +90,11 @@ public class sym {
/*
AXES: count once at DCOLON,
these axes names are therefore not counted:
NAMESPACE, FOLLOWINGSIBLING, CHILD, DESCENDANTORSELF, DESCENDANT
, PRECEDINGSIBLING, SELF, ANCESTORORSELF, PRECEDING, ANCESTOROR, PARENT, FOLLOWING, ATTRIBUTE
NAMESPACE, FOLLOWINGSIBLING, CHILD, DESCENDANTORSELF, DESCENDANT,
PRECEDINGSIBLING, SELF, ANCESTORORSELF, PRECEDING, ANCESTOROR, PARENT,
FOLLOWING, ATTRIBUTE
*/
public static final int[] OPERATORS = {GE, SLASH, ATSIGN, LPAREN, DCOLON,
MINUS, STAR, LT, OR, DIV, PLUS, LE, VBAR, MOD, EQ, LBRACK, DOLLAR, NE, GT};
public static final int[] OPERATORS = {GT, GE, EQ, NE, LT, LE, SLASH, DSLASH,
DOT, DDOT, ATSIGN, DCOLON, PLUS, MINUS, STAR, DIV, MOD, AND, OR, LPAREN,
LBRACK, VBAR, DOLLAR, NODE, TEXT, PI, PIPARAM};
}

View File

@ -32,7 +32,7 @@ import jdk.xml.internal.XMLSecurityManager.Limit;
* This class is in charge of lexical processing of the XPath
* expression into tokens.
*
* @LastModified: Apr 2022
* @LastModified: June 2022
*/
class Lexer
{
@ -155,6 +155,7 @@ class Lexer
boolean isStartOfPat = true;
boolean isAttrName = false;
boolean isNum = false;
boolean isAxis = false;
// Nesting of '[' so we can know if the given element should be
// counted inside the m_patternMap.
@ -254,8 +255,7 @@ class Lexer
// check operator symbol
String s = pat.substring(startSubstring, i);
if (Token.contains(s)) {
m_opCount++;
isLiteral = false;
incrementCount();
}
addToTokenQueue(s);
}
@ -339,23 +339,45 @@ class Lexer
{
nesting--;
}
else if ((Token.LPAREN == c) || (Token.LBRACK == c))
else if (Token.LBRACK == c)
{
nesting++;
if (!isLiteral && (Token.LPAREN == c)) {
m_grpCount++;
m_opCount++;
isLiteral = false;
incrementCount();
isAxis = false;
}
else if ((Token.LPAREN == c))
{
nesting++;
if (isLiteral) {
if (!isAxis) {
incrementCount();
}
} else {
m_grpCount++;
incrementCount();
}
isAxis = false;
}
if ((Token.GT == c || Token.LT == c || Token.EQ == c) && Token.EQ != peekNext(pat, i)) {
m_opCount++;
isLiteral = false;
if ((Token.GT == c || Token.LT == c || Token.EQ == c || Token.EM == c)) {
if (Token.EQ != peekNext(pat, i)) {
incrementCount();
}
else if ((Token.LPAREN != c) && (Token.RPAREN != c) && (Token.RBRACK != c)) {
m_opCount++;
isLiteral = false;
}
else if (Token.SLASH == c) {
isAxis = false;
if (Token.SLASH != peekNext(pat, i)) {
incrementCount();
}
}
// '(' and '[' already counted above; ':' is examined in case below
// ',' is part of a function
else if ((Token.LPAREN != c) && (Token.LBRACK != c) && (Token.RPAREN != c)
&& (Token.RBRACK != c) && (Token.COLON != c) && (Token.COMMA != c)) {
if (Token.STAR != c || !isAxis) {
incrementCount();
}
isAxis = false;
}
addToTokenQueue(pat.substring(i, i + 1));
@ -376,6 +398,7 @@ class Lexer
startSubstring = -1;
posOfNSSep = -1;
m_opCount++;
isAxis = true;
addToTokenQueue(pat.substring(i - 1, i + 1));
break;
@ -389,6 +412,9 @@ class Lexer
// fall through on purpose
default :
isLiteral = true;
if (!isNum && Token.DOT == c && Token.DOT != peekNext(pat, i)) {
incrementCount();
}
if (-1 == startSubstring)
{
startSubstring = i;
@ -443,6 +469,11 @@ class Lexer
m_processor.m_queueMark = 0;
}
private void incrementCount() {
m_opCount++;
isLiteral = false;
}
/**
* Peeks at the next character without advancing the index.
* @param s the input string