8270646: Improved scanning of XML entities

Reviewed-by: naoto, lancea, mschoene, rhalade
This commit is contained in:
Joe Wang 2021-08-17 04:34:17 +00:00 committed by Henry Jen
parent fa47c368d4
commit b02ea6dc3c
6 changed files with 209 additions and 549 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -54,7 +54,7 @@ import com.sun.org.apache.xerces.internal.xni.XNIException;
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author Eric Ye, IBM
*
* @LastModified: Aug 2021
*/
public class XML11DocumentScannerImpl
extends XMLDocumentScannerImpl {
@ -278,16 +278,6 @@ public class XML11DocumentScannerImpl
+ fStringBuffer.toString() + "\"");
}
}
// note that none of these characters should ever get through
// XML11EntityScanner. Not sure why
// this check was originally necessary. - NG
else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
fEntityScanner.scanChar(null);
fStringBuffer.append(' ');
if (entityDepth == fEntityDepth) {
fStringBuffer2.append('\n');
}
}
else if (c != -1 && XMLChar.isHighSurrogate(c)) {
fStringBuffer3.clear();
if (scanSurrogates(fStringBuffer3)) {

View File

@ -21,6 +21,7 @@
package com.sun.org.apache.xerces.internal.impl;
import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_1;
import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
import com.sun.org.apache.xerces.internal.util.XML11Char;
@ -40,7 +41,7 @@ import java.io.IOException;
* @author Michael Glavassevich, IBM
* @author Neil Graham, IBM
*
* @LastModified: Apr 2021
* @LastModified: Aug 2021
*/
public class XML11EntityScanner
@ -116,7 +117,7 @@ public class XML11EntityScanner
load(1, false, false);
offset = 0;
}
if (c == '\r' && external) {
if (c == '\r' && external && fCurrentEntity.position < fCurrentEntity.count) {
int cc = fCurrentEntity.ch[fCurrentEntity.position++];
if (cc != '\n' && cc != 0x85) {
fCurrentEntity.position--;
@ -761,71 +762,12 @@ public class XML11EntityScanner
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean counted = false;
boolean external = fCurrentEntity.isExternal();
if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if ((c == '\r' ) && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
checkEntityLimit(null, fCurrentEntity, offset, newlines);
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false, true)) {
counted = true;
break;
}
}
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == '\n' || cc == 0x85) {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
checkEntityLimit(null, fCurrentEntity, offset, newlines);
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false, true)) {
counted = true;
break;
}
}
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
checkEntityLimit(null, fCurrentEntity, offset, length);
content.setValues(fCurrentEntity.ch, offset, length);
return -1;
}
if (normalizeNewlines(XML_VERSION_1_1, content, false, false, null)) {
return -1;
}
int c;
boolean external = fCurrentEntity.isExternal();
// inner loop, scanning for content
if (external) {
while (fCurrentEntity.position < fCurrentEntity.count) {
@ -913,65 +855,12 @@ public class XML11EntityScanner
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean external = fCurrentEntity.isExternal();
if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if ((c == '\r' ) && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false, true)) {
break;
}
}
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == '\n' || cc == 0x85) {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false, true)) {
break;
}
}
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
content.setValues(fCurrentEntity.ch, offset, length);
return -1;
}
if (normalizeNewlines(XML_VERSION_1_1, content, false, true, null)) {
return -1;
}
int c;
boolean external = fCurrentEntity.isExternal();
// scan literal value
if (external) {
while (fCurrentEntity.position < fCurrentEntity.count) {
@ -1093,66 +982,11 @@ public class XML11EntityScanner
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if ((c == '\r' ) && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
if (load(newlines, false, true)) {
break;
}
}
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == '\n' || cc == 0x85) {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
fCurrentEntity.position = newlines;
fCurrentEntity.startPosition = newlines;
fCurrentEntity.count = newlines;
if (load(newlines, false, true)) {
break;
}
}
}
else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
buffer.append(fCurrentEntity.ch, offset, length);
return true;
}
if (normalizeNewlines(XML_VERSION_1_1, buffer, true, false, NameType.COMMENT)) {
return true;
}
int c;
// iterate over buffer looking for delimiter
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
@ -1256,22 +1090,6 @@ public class XML11EntityScanner
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
return true;
}
else if (c == '\n' && (cc == '\r' ) && fCurrentEntity.isExternal()) {
// handle newlines
if (fCurrentEntity.position == fCurrentEntity.count) {
invokeListeners(1);
fCurrentEntity.ch[0] = (char)cc;
load(1, false, false);
}
int ccc = fCurrentEntity.ch[++fCurrentEntity.position];
if (ccc == '\n' || ccc == 0x85) {
fCurrentEntity.position++;
}
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
return true;
}
// character was not skipped
return false;

View File

@ -21,6 +21,8 @@
package com.sun.org.apache.xerces.internal.impl;
import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_0;
import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_1;
import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
@ -55,7 +57,7 @@ import java.util.Locale;
* @author Arnaud Le Hors, IBM
* @author K.Venugopal Sun Microsystems
*
* @LastModified: Apr 2021
* @LastModified: Aug 2021
*/
public class XMLEntityScanner implements XMLLocator {
@ -149,6 +151,15 @@ public class XMLEntityScanner implements XMLLocator {
// indicates that the operation is for detecting XML version
boolean detectingVersion = false;
// offset of the current cursor position
int offset = 0;
// number of newlines in the current process
int newlines = 0;
// indicating whether the content has been counted towards limit
boolean counted = false;
//
// Constructors
//
@ -553,7 +564,7 @@ public class XMLEntityScanner implements XMLLocator {
}
// scan character
int offset = fCurrentEntity.position;
offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\n' || (c == '\r' && isExternal)) {
fCurrentEntity.lineNumber++;
@ -561,10 +572,10 @@ public class XMLEntityScanner implements XMLLocator {
if (fCurrentEntity.position == fCurrentEntity.count) {
invokeListeners(1);
fCurrentEntity.ch[0] = (char)c;
load(1, false, false);
load(1, true, false);
offset = 0;
}
if (c == '\r' && isExternal) {
if (c == '\r' && isExternal && fCurrentEntity.position < fCurrentEntity.count) {
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
fCurrentEntity.position--;
}
@ -614,7 +625,7 @@ public class XMLEntityScanner implements XMLLocator {
}
// scan nmtoken
int offset = fCurrentEntity.position;
offset = fCurrentEntity.position;
boolean vc = false;
char c;
while (true){
@ -695,7 +706,7 @@ public class XMLEntityScanner implements XMLLocator {
}
// scan name
int offset = fCurrentEntity.position;
offset = fCurrentEntity.position;
int length;
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
if (++fCurrentEntity.position == fCurrentEntity.count) {
@ -788,7 +799,7 @@ public class XMLEntityScanner implements XMLLocator {
}
// scan qualified name
int offset = fCurrentEntity.position;
offset = fCurrentEntity.position;
//making a check if if the specified character is a valid name start character
//as defined by production [5] in the XML 1.0 specification.
@ -1043,81 +1054,11 @@ public class XMLEntityScanner implements XMLLocator {
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
boolean counted = false;
if (c == '\n' || (c == '\r' && isExternal)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && isExternal) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
checkEntityLimit(null, fCurrentEntity, offset, newlines);
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
counted = true;
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
} else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
checkEntityLimit(null, fCurrentEntity, offset, newlines);
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
counted = true;
break;
}
}
} else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
checkEntityLimit(null, fCurrentEntity, offset, length);
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
//on buffering the data..
content.setValues(fCurrentEntity.ch, offset, length);
//content.append(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
return -1;
}
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
if (normalizeNewlines(XML_VERSION_1_0, content, false, false, null)) {
return -1;
}
int c;
while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (!XMLChar.isContent(c)) {
@ -1202,85 +1143,14 @@ public class XMLEntityScanner implements XMLLocator {
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
if(whiteSpaceInfoNeeded)
whiteSpaceLen=0;
if (c == '\n' || (c == '\r' && isExternal)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && isExternal) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
/***/
} else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
break;
}
}
/*** NEWLINE NORMALIZATION ***
* if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
* && external) {
* fCurrentEntity.position++;
* offset++;
* }
* /***/
} else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
int i=0;
for ( i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
storeWhiteSpace(i);
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
content.setValues(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
return -1;
}
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
if (normalizeNewlines(XML_VERSION_1_0, content, false, true, null)) {
return -1;
}
int c;
// scan literal value
for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
c = fCurrentEntity.ch[fCurrentEntity.position];
@ -1331,7 +1201,7 @@ public class XMLEntityScanner implements XMLLocator {
*
* @param whiteSpacePos position of a whitespace in the scanner entity buffer
*/
private void storeWhiteSpace(int whiteSpacePos) {
void storeWhiteSpace(int whiteSpacePos) {
if (whiteSpaceLen >= whiteSpaceLookup.length) {
int [] tmp = new int[whiteSpaceLookup.length + 100];
System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
@ -1415,75 +1285,11 @@ public class XMLEntityScanner implements XMLLocator {
return false;
}
// normalize newlines
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
if (c == '\n' || (c == '\r' && isExternal)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if (c == '\r' && isExternal) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
break;
}
}
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
} else if (c == '\n') {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
offset = 0;
fCurrentEntity.position = newlines;
fCurrentEntity.count = newlines;
if (load(newlines, false, true)) {
break;
}
}
} else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
buffer.append(fCurrentEntity.ch, offset, length);
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
return true;
}
if (DEBUG_BUFFER) {
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
print();
System.out.println();
}
if (normalizeNewlines(XML_VERSION_1_0, buffer, true, false, NameType.COMMENT)) {
return true;
}
int c;
// iterate over buffer looking for delimiter
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
@ -1570,7 +1376,7 @@ public class XMLEntityScanner implements XMLLocator {
}
// skip character
int offset = fCurrentEntity.position;
offset = fCurrentEntity.position;
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == c) {
fCurrentEntity.position++;
@ -1587,26 +1393,6 @@ public class XMLEntityScanner implements XMLLocator {
}
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
return true;
} else if (c == '\n' && cc == '\r' && isExternal) {
// handle newlines
if (fCurrentEntity.position == fCurrentEntity.count) {
invokeListeners(1);
fCurrentEntity.ch[0] = (char)cc;
load(1, false, false);
}
fCurrentEntity.position++;
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
fCurrentEntity.position++;
}
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (DEBUG_BUFFER) {
System.out.print(")skipChar, '"+(char)c+"': ");
print();
System.out.println(" -> true");
}
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
return true;
}
// character was not skipped
@ -1659,7 +1445,7 @@ public class XMLEntityScanner implements XMLLocator {
// skip spaces
int c = fCurrentEntity.ch[fCurrentEntity.position];
int offset = fCurrentEntity.position - 1;
offset = fCurrentEntity.position - 1;
if (XMLChar.isSpace(c)) {
do {
boolean entityChanged = false;
@ -2332,5 +2118,86 @@ public class XMLEntityScanner implements XMLLocator {
} // skipDeclSpaces():boolean
/**
* Normalizes newlines. As specified in XML specification, this method
* converts newlines, '\n', '\r' and '\r\n' to '\n' as 2.11 End-of-Line Handling.
* Further, it may put them in a cache for later process as needed, for example
* as specified in 3.3.3 Attribute-Value Normalization.
*
* @ImplNote this method does not limit to processing external parsed entities
* as 2.11 required. It handles all cases where newlines need to be processed.
*
* @param buffer the current content buffer
* @param append a flag indicating whether to append to the buffer
* @param storeWS a flag indicating whether the whitespaces need to be stored
* for later processing
* @param nt the type of the entity
* @return true if the cursor is at the end of the current entity, false otherwise.
* @throws IOException
*/
protected boolean normalizeNewlines(short version, XMLString buffer, boolean append,
boolean storeWS, NameType nt)
throws IOException {
// normalize newlines
offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
newlines = 0;
// how this information is used is determined by the caller of this method
counted = false;
if ((c == '\n' || c == '\r') ||
(version == XML_VERSION_1_1 && (c == 0x85 || c == 0x2028))) {
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if ((c == '\n' || c == '\r') ||
(version == XML_VERSION_1_1 && (c == 0x85 || c == 0x2028))) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
checkEntityLimit(nt, fCurrentEntity, offset, newlines);
offset = 0;
fCurrentEntity.position = newlines;
if (load(newlines, false, true)) {
counted = true;
break;
}
}
if (c == '\r') {
int cc = fCurrentEntity.ch[fCurrentEntity.position];
if (cc == '\n' || (version == XML_VERSION_1_1 && cc == 0x85)) {
fCurrentEntity.position++;
offset++;
}
/*** NEWLINE NORMALIZATION ***/
else {
newlines++;
}
}
} else {
fCurrentEntity.position--;
break;
}
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
for (int i = offset; i < fCurrentEntity.position; i++) {
fCurrentEntity.ch[i] = '\n';
if (storeWS) {
storeWhiteSpace(i);
}
}
int length = fCurrentEntity.position - offset;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
checkEntityLimit(nt, fCurrentEntity, offset, length);
if (append) {
buffer.append(fCurrentEntity.ch, offset, length);
} else {
buffer.setValues(fCurrentEntity.ch, offset, length);
}
return true;
}
}
return false;
}
} // class XMLEntityScanner

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -67,7 +67,7 @@ import com.sun.xml.internal.stream.Entity;
* @author Eric Ye, IBM
* @author K.Venugopal SUN Microsystems
* @author Sunitha Reddy, SUN Microsystems
* @LastModified: Feb 2020
* @LastModified: Aug 2021
*/
public abstract class XMLScanner
implements XMLComponent {
@ -956,12 +956,6 @@ public abstract class XMLScanner
System.out.println("** valueF: \""
+ stringBuffer.toString() + "\"");
}
} else if (c == '\n' || c == '\r') {
fEntityScanner.scanChar(null);
stringBuffer.append(' ');
if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
fStringBuffer2.append('\n');
}
} else if (c != -1 && XMLChar.isHighSurrogate(c)) {
fStringBuffer3.clear();
if (scanSurrogates(fStringBuffer3)) {

View File

@ -1,6 +1,5 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -41,19 +40,11 @@ import com.sun.org.apache.xerces.internal.xni.XMLString;
*
* @author Andy Clark, IBM
* @author Eric Ye, IBM
*
* @LastModified: Aug 2021
*/
public class XMLStringBuffer
extends XMLString {
//
// Constants
//
/** Default buffer size (32). */
public static final int DEFAULT_SIZE = 32;
//
// Data
//
@ -112,79 +103,4 @@ extends XMLString {
length = 0;
}
/**
* append
*
* @param c
*/
public void append(char c) {
if(this.length + 1 > this.ch.length){
int newLength = this.ch.length * 2 ;
if(newLength < this.ch.length + DEFAULT_SIZE){
newLength = this.ch.length + DEFAULT_SIZE;
}
char [] tmp = new char[newLength];
System.arraycopy(this.ch, 0, tmp, 0, this.length);
this.ch = tmp;
}
this.ch[this.length] = c ;
this.length++;
} // append(char)
/**
* append
*
* @param s
*/
public void append(String s) {
int length = s.length();
if (this.length + length > this.ch.length) {
int newLength = this.ch.length * 2 ;
if(newLength < this.ch.length + length + DEFAULT_SIZE){
newLength = this.ch.length + length+ DEFAULT_SIZE;
}
char[] newch = new char[newLength];
System.arraycopy(this.ch, 0, newch, 0, this.length);
this.ch = newch;
}
s.getChars(0, length, this.ch, this.length);
this.length += length;
} // append(String)
/**
* append
*
* @param ch
* @param offset
* @param length
*/
public void append(char[] ch, int offset, int length) {
if (this.length + length > this.ch.length) {
int newLength = this.ch.length * 2 ;
if(newLength < this.ch.length + length + DEFAULT_SIZE){
newLength = this.ch.length + length + DEFAULT_SIZE;
}
char[] newch = new char[newLength];
System.arraycopy(this.ch, 0, newch, 0, this.length);
this.ch = newch;
}
//making the code more robust as it would handle null or 0 length data,
//add the data only when it contains some thing
if(ch != null && length > 0){
System.arraycopy(ch, offset, this.ch, this.length, length);
this.length += length;
}
} // append(char[],int,int)
/**
* append
*
* @param s
*/
public void append(XMLString s) {
append(s.ch, s.offset, s.length);
} // append(XMLString)
} // class XMLStringBuffer

View File

@ -1,6 +1,5 @@
/*
* reserved comment block
* DO NOT REMOVE OR ALTER!
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -42,9 +41,11 @@ package com.sun.org.apache.xerces.internal.xni;
*
* @author Eric Ye, IBM
* @author Andy Clark, IBM
*
* @LastModified: Aug 2021
*/
public class XMLString {
/** Default buffer size (32). */
public static final int DEFAULT_SIZE = 32;
//
// Data
@ -189,4 +190,78 @@ public class XMLString {
return length > 0 ? new String(ch, offset, length) : "";
} // toString():String
/**
* Appends a char to the buffer.
*
* @param c the char
*/
public void append(char c) {
if(this.length + 1 > this.ch.length){
int newLength = this.ch.length * 2 ;
if(newLength < this.ch.length + DEFAULT_SIZE){
newLength = this.ch.length + DEFAULT_SIZE;
}
char [] tmp = new char[newLength];
System.arraycopy(this.ch, 0, tmp, 0, this.length);
this.ch = tmp;
}
this.ch[this.length] = c ;
this.length++;
} // append(char)
/**
* Appends a string to the buffer.
*
* @param s the string
*/
public void append(String s) {
int length = s.length();
if (this.length + length > this.ch.length) {
int newLength = this.ch.length * 2 ;
if(newLength < this.ch.length + length + DEFAULT_SIZE){
newLength = this.ch.length + length+ DEFAULT_SIZE;
}
char[] newch = new char[newLength];
System.arraycopy(this.ch, 0, newch, 0, this.length);
this.ch = newch;
}
s.getChars(0, length, this.ch, this.length);
this.length += length;
} // append(String)
/**
* Appends a number of characters to the buffer.
*
* @param ch the char array
* @param offset the offset
* @param length the length
*/
public void append(char[] ch, int offset, int length) {
if (this.length + length > this.ch.length) {
int newLength = this.ch.length * 2 ;
if(newLength < this.ch.length + length + DEFAULT_SIZE){
newLength = this.ch.length + length + DEFAULT_SIZE;
}
char[] newch = new char[newLength];
System.arraycopy(this.ch, 0, newch, 0, this.length);
this.ch = newch;
}
//making the code more robust as it would handle null or 0 length data,
//add the data only when it contains some thing
if(ch != null && length > 0){
System.arraycopy(ch, offset, this.ch, this.length, length);
this.length += length;
}
} // append(char[],int,int)
/**
* Appends another buffer to this buffer
*
* @param s another buffer
*/
public void append(XMLString s) {
append(s.ch, s.offset, s.length);
} // append(XMLString)
} // class XMLString