8270646: Improved scanning of XML entities
Reviewed-by: naoto, lancea, mschoene, rhalade
This commit is contained in:
parent
fa47c368d4
commit
b02ea6dc3c
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -54,7 +54,7 @@ import com.sun.org.apache.xerces.internal.xni.XNIException;
|
||||
* @author Andy Clark, IBM
|
||||
* @author Arnaud Le Hors, IBM
|
||||
* @author Eric Ye, IBM
|
||||
*
|
||||
* @LastModified: Aug 2021
|
||||
*/
|
||||
public class XML11DocumentScannerImpl
|
||||
extends XMLDocumentScannerImpl {
|
||||
@ -278,16 +278,6 @@ public class XML11DocumentScannerImpl
|
||||
+ fStringBuffer.toString() + "\"");
|
||||
}
|
||||
}
|
||||
// note that none of these characters should ever get through
|
||||
// XML11EntityScanner. Not sure why
|
||||
// this check was originally necessary. - NG
|
||||
else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
|
||||
fEntityScanner.scanChar(null);
|
||||
fStringBuffer.append(' ');
|
||||
if (entityDepth == fEntityDepth) {
|
||||
fStringBuffer2.append('\n');
|
||||
}
|
||||
}
|
||||
else if (c != -1 && XMLChar.isHighSurrogate(c)) {
|
||||
fStringBuffer3.clear();
|
||||
if (scanSurrogates(fStringBuffer3)) {
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
package com.sun.org.apache.xerces.internal.impl;
|
||||
|
||||
import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_1;
|
||||
import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
|
||||
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
|
||||
import com.sun.org.apache.xerces.internal.util.XML11Char;
|
||||
@ -40,7 +41,7 @@ import java.io.IOException;
|
||||
* @author Michael Glavassevich, IBM
|
||||
* @author Neil Graham, IBM
|
||||
*
|
||||
* @LastModified: Apr 2021
|
||||
* @LastModified: Aug 2021
|
||||
*/
|
||||
|
||||
public class XML11EntityScanner
|
||||
@ -116,7 +117,7 @@ public class XML11EntityScanner
|
||||
load(1, false, false);
|
||||
offset = 0;
|
||||
}
|
||||
if (c == '\r' && external) {
|
||||
if (c == '\r' && external && fCurrentEntity.position < fCurrentEntity.count) {
|
||||
int cc = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if (cc != '\n' && cc != 0x85) {
|
||||
fCurrentEntity.position--;
|
||||
@ -761,71 +762,12 @@ public class XML11EntityScanner
|
||||
}
|
||||
|
||||
// normalize newlines
|
||||
int offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[offset];
|
||||
int newlines = 0;
|
||||
boolean counted = false;
|
||||
boolean external = fCurrentEntity.isExternal();
|
||||
if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
|
||||
do {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if ((c == '\r' ) && external) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
checkEntityLimit(null, fCurrentEntity, offset, newlines);
|
||||
offset = 0;
|
||||
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
|
||||
fCurrentEntity.position = newlines;
|
||||
fCurrentEntity.startPosition = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
counted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
int cc = fCurrentEntity.ch[fCurrentEntity.position];
|
||||
if (cc == '\n' || cc == 0x85) {
|
||||
fCurrentEntity.position++;
|
||||
offset++;
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***/
|
||||
else {
|
||||
newlines++;
|
||||
}
|
||||
}
|
||||
else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
checkEntityLimit(null, fCurrentEntity, offset, newlines);
|
||||
offset = 0;
|
||||
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
|
||||
fCurrentEntity.position = newlines;
|
||||
fCurrentEntity.startPosition = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
counted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
fCurrentEntity.position--;
|
||||
break;
|
||||
}
|
||||
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
|
||||
for (int i = offset; i < fCurrentEntity.position; i++) {
|
||||
fCurrentEntity.ch[i] = '\n';
|
||||
}
|
||||
int length = fCurrentEntity.position - offset;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
|
||||
checkEntityLimit(null, fCurrentEntity, offset, length);
|
||||
content.setValues(fCurrentEntity.ch, offset, length);
|
||||
return -1;
|
||||
}
|
||||
if (normalizeNewlines(XML_VERSION_1_1, content, false, false, null)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int c;
|
||||
boolean external = fCurrentEntity.isExternal();
|
||||
// inner loop, scanning for content
|
||||
if (external) {
|
||||
while (fCurrentEntity.position < fCurrentEntity.count) {
|
||||
@ -913,65 +855,12 @@ public class XML11EntityScanner
|
||||
}
|
||||
|
||||
// normalize newlines
|
||||
int offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[offset];
|
||||
int newlines = 0;
|
||||
boolean external = fCurrentEntity.isExternal();
|
||||
if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
|
||||
do {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if ((c == '\r' ) && external) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
|
||||
fCurrentEntity.position = newlines;
|
||||
fCurrentEntity.startPosition = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
int cc = fCurrentEntity.ch[fCurrentEntity.position];
|
||||
if (cc == '\n' || cc == 0x85) {
|
||||
fCurrentEntity.position++;
|
||||
offset++;
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***/
|
||||
else {
|
||||
newlines++;
|
||||
}
|
||||
}
|
||||
else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
|
||||
fCurrentEntity.position = newlines;
|
||||
fCurrentEntity.startPosition = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
fCurrentEntity.position--;
|
||||
break;
|
||||
}
|
||||
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
|
||||
for (int i = offset; i < fCurrentEntity.position; i++) {
|
||||
fCurrentEntity.ch[i] = '\n';
|
||||
}
|
||||
int length = fCurrentEntity.position - offset;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
|
||||
content.setValues(fCurrentEntity.ch, offset, length);
|
||||
return -1;
|
||||
}
|
||||
if (normalizeNewlines(XML_VERSION_1_1, content, false, true, null)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int c;
|
||||
boolean external = fCurrentEntity.isExternal();
|
||||
// scan literal value
|
||||
if (external) {
|
||||
while (fCurrentEntity.position < fCurrentEntity.count) {
|
||||
@ -1093,66 +982,11 @@ public class XML11EntityScanner
|
||||
}
|
||||
|
||||
// normalize newlines
|
||||
int offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[offset];
|
||||
int newlines = 0;
|
||||
if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
|
||||
do {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if ((c == '\r' ) && external) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
|
||||
fCurrentEntity.position = newlines;
|
||||
fCurrentEntity.startPosition = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
int cc = fCurrentEntity.ch[fCurrentEntity.position];
|
||||
if (cc == '\n' || cc == 0x85) {
|
||||
fCurrentEntity.position++;
|
||||
offset++;
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***/
|
||||
else {
|
||||
newlines++;
|
||||
}
|
||||
}
|
||||
else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
|
||||
fCurrentEntity.position = newlines;
|
||||
fCurrentEntity.startPosition = newlines;
|
||||
fCurrentEntity.count = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
fCurrentEntity.position--;
|
||||
break;
|
||||
}
|
||||
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
|
||||
for (int i = offset; i < fCurrentEntity.position; i++) {
|
||||
fCurrentEntity.ch[i] = '\n';
|
||||
}
|
||||
int length = fCurrentEntity.position - offset;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
|
||||
checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
|
||||
buffer.append(fCurrentEntity.ch, offset, length);
|
||||
return true;
|
||||
}
|
||||
if (normalizeNewlines(XML_VERSION_1_1, buffer, true, false, NameType.COMMENT)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int c;
|
||||
// iterate over buffer looking for delimiter
|
||||
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
@ -1256,22 +1090,6 @@ public class XML11EntityScanner
|
||||
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
|
||||
return true;
|
||||
}
|
||||
else if (c == '\n' && (cc == '\r' ) && fCurrentEntity.isExternal()) {
|
||||
// handle newlines
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
invokeListeners(1);
|
||||
fCurrentEntity.ch[0] = (char)cc;
|
||||
load(1, false, false);
|
||||
}
|
||||
int ccc = fCurrentEntity.ch[++fCurrentEntity.position];
|
||||
if (ccc == '\n' || ccc == 0x85) {
|
||||
fCurrentEntity.position++;
|
||||
}
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
|
||||
return true;
|
||||
}
|
||||
|
||||
// character was not skipped
|
||||
return false;
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
package com.sun.org.apache.xerces.internal.impl;
|
||||
|
||||
import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_0;
|
||||
import static com.sun.org.apache.xerces.internal.impl.Constants.XML_VERSION_1_1;
|
||||
import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
|
||||
import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
|
||||
import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
|
||||
@ -55,7 +57,7 @@ import java.util.Locale;
|
||||
* @author Arnaud Le Hors, IBM
|
||||
* @author K.Venugopal Sun Microsystems
|
||||
*
|
||||
* @LastModified: Apr 2021
|
||||
* @LastModified: Aug 2021
|
||||
*/
|
||||
public class XMLEntityScanner implements XMLLocator {
|
||||
|
||||
@ -149,6 +151,15 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
// indicates that the operation is for detecting XML version
|
||||
boolean detectingVersion = false;
|
||||
|
||||
// offset of the current cursor position
|
||||
int offset = 0;
|
||||
|
||||
// number of newlines in the current process
|
||||
int newlines = 0;
|
||||
|
||||
// indicating whether the content has been counted towards limit
|
||||
boolean counted = false;
|
||||
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
@ -553,7 +564,7 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
|
||||
// scan character
|
||||
int offset = fCurrentEntity.position;
|
||||
offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if (c == '\n' || (c == '\r' && isExternal)) {
|
||||
fCurrentEntity.lineNumber++;
|
||||
@ -561,10 +572,10 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
invokeListeners(1);
|
||||
fCurrentEntity.ch[0] = (char)c;
|
||||
load(1, false, false);
|
||||
load(1, true, false);
|
||||
offset = 0;
|
||||
}
|
||||
if (c == '\r' && isExternal) {
|
||||
if (c == '\r' && isExternal && fCurrentEntity.position < fCurrentEntity.count) {
|
||||
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
|
||||
fCurrentEntity.position--;
|
||||
}
|
||||
@ -614,7 +625,7 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
|
||||
// scan nmtoken
|
||||
int offset = fCurrentEntity.position;
|
||||
offset = fCurrentEntity.position;
|
||||
boolean vc = false;
|
||||
char c;
|
||||
while (true){
|
||||
@ -695,7 +706,7 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
|
||||
// scan name
|
||||
int offset = fCurrentEntity.position;
|
||||
offset = fCurrentEntity.position;
|
||||
int length;
|
||||
if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
|
||||
if (++fCurrentEntity.position == fCurrentEntity.count) {
|
||||
@ -788,7 +799,7 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
|
||||
// scan qualified name
|
||||
int offset = fCurrentEntity.position;
|
||||
offset = fCurrentEntity.position;
|
||||
|
||||
//making a check if if the specified character is a valid name start character
|
||||
//as defined by production [5] in the XML 1.0 specification.
|
||||
@ -1043,81 +1054,11 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
|
||||
// normalize newlines
|
||||
int offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[offset];
|
||||
int newlines = 0;
|
||||
boolean counted = false;
|
||||
if (c == '\n' || (c == '\r' && isExternal)) {
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
do {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if (c == '\r' && isExternal) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
checkEntityLimit(null, fCurrentEntity, offset, newlines);
|
||||
offset = 0;
|
||||
fCurrentEntity.position = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
counted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
|
||||
fCurrentEntity.position++;
|
||||
offset++;
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***/
|
||||
else {
|
||||
newlines++;
|
||||
}
|
||||
} else if (c == '\n') {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
checkEntityLimit(null, fCurrentEntity, offset, newlines);
|
||||
offset = 0;
|
||||
fCurrentEntity.position = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
counted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fCurrentEntity.position--;
|
||||
break;
|
||||
}
|
||||
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
|
||||
for (int i = offset; i < fCurrentEntity.position; i++) {
|
||||
fCurrentEntity.ch[i] = '\n';
|
||||
}
|
||||
int length = fCurrentEntity.position - offset;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
|
||||
checkEntityLimit(null, fCurrentEntity, offset, length);
|
||||
//CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
|
||||
//on buffering the data..
|
||||
content.setValues(fCurrentEntity.ch, offset, length);
|
||||
//content.append(fCurrentEntity.ch, offset, length);
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
if (normalizeNewlines(XML_VERSION_1_0, content, false, false, null)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int c;
|
||||
while (fCurrentEntity.position < fCurrentEntity.count) {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if (!XMLChar.isContent(c)) {
|
||||
@ -1202,85 +1143,14 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
|
||||
// normalize newlines
|
||||
int offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[offset];
|
||||
int newlines = 0;
|
||||
if(whiteSpaceInfoNeeded)
|
||||
whiteSpaceLen=0;
|
||||
if (c == '\n' || (c == '\r' && isExternal)) {
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
do {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if (c == '\r' && isExternal) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.position = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
|
||||
fCurrentEntity.position++;
|
||||
offset++;
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***/
|
||||
else {
|
||||
newlines++;
|
||||
}
|
||||
/***/
|
||||
} else if (c == '\n') {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.position = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***
|
||||
* if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
|
||||
* && external) {
|
||||
* fCurrentEntity.position++;
|
||||
* offset++;
|
||||
* }
|
||||
* /***/
|
||||
} else {
|
||||
fCurrentEntity.position--;
|
||||
break;
|
||||
}
|
||||
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
|
||||
int i=0;
|
||||
for ( i = offset; i < fCurrentEntity.position; i++) {
|
||||
fCurrentEntity.ch[i] = '\n';
|
||||
storeWhiteSpace(i);
|
||||
}
|
||||
|
||||
int length = fCurrentEntity.position - offset;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
|
||||
content.setValues(fCurrentEntity.ch, offset, length);
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
if (normalizeNewlines(XML_VERSION_1_0, content, false, true, null)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int c;
|
||||
// scan literal value
|
||||
for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position];
|
||||
@ -1331,7 +1201,7 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
*
|
||||
* @param whiteSpacePos position of a whitespace in the scanner entity buffer
|
||||
*/
|
||||
private void storeWhiteSpace(int whiteSpacePos) {
|
||||
void storeWhiteSpace(int whiteSpacePos) {
|
||||
if (whiteSpaceLen >= whiteSpaceLookup.length) {
|
||||
int [] tmp = new int[whiteSpaceLookup.length + 100];
|
||||
System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
|
||||
@ -1415,75 +1285,11 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
return false;
|
||||
}
|
||||
|
||||
// normalize newlines
|
||||
int offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[offset];
|
||||
int newlines = 0;
|
||||
if (c == '\n' || (c == '\r' && isExternal)) {
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
do {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if (c == '\r' && isExternal) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.position = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
|
||||
fCurrentEntity.position++;
|
||||
offset++;
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***/
|
||||
else {
|
||||
newlines++;
|
||||
}
|
||||
} else if (c == '\n') {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
offset = 0;
|
||||
fCurrentEntity.position = newlines;
|
||||
fCurrentEntity.count = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fCurrentEntity.position--;
|
||||
break;
|
||||
}
|
||||
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
|
||||
for (int i = offset; i < fCurrentEntity.position; i++) {
|
||||
fCurrentEntity.ch[i] = '\n';
|
||||
}
|
||||
int length = fCurrentEntity.position - offset;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
|
||||
checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
|
||||
buffer.append(fCurrentEntity.ch, offset, length);
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
|
||||
print();
|
||||
System.out.println();
|
||||
}
|
||||
if (normalizeNewlines(XML_VERSION_1_0, buffer, true, false, NameType.COMMENT)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int c;
|
||||
// iterate over buffer looking for delimiter
|
||||
OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
@ -1570,7 +1376,7 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
|
||||
// skip character
|
||||
int offset = fCurrentEntity.position;
|
||||
offset = fCurrentEntity.position;
|
||||
int cc = fCurrentEntity.ch[fCurrentEntity.position];
|
||||
if (cc == c) {
|
||||
fCurrentEntity.position++;
|
||||
@ -1587,26 +1393,6 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
}
|
||||
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
|
||||
return true;
|
||||
} else if (c == '\n' && cc == '\r' && isExternal) {
|
||||
// handle newlines
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
invokeListeners(1);
|
||||
fCurrentEntity.ch[0] = (char)cc;
|
||||
load(1, false, false);
|
||||
}
|
||||
fCurrentEntity.position++;
|
||||
if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
|
||||
fCurrentEntity.position++;
|
||||
}
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (DEBUG_BUFFER) {
|
||||
System.out.print(")skipChar, '"+(char)c+"': ");
|
||||
print();
|
||||
System.out.println(" -> true");
|
||||
}
|
||||
checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
|
||||
return true;
|
||||
}
|
||||
|
||||
// character was not skipped
|
||||
@ -1659,7 +1445,7 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
|
||||
// skip spaces
|
||||
int c = fCurrentEntity.ch[fCurrentEntity.position];
|
||||
int offset = fCurrentEntity.position - 1;
|
||||
offset = fCurrentEntity.position - 1;
|
||||
if (XMLChar.isSpace(c)) {
|
||||
do {
|
||||
boolean entityChanged = false;
|
||||
@ -2332,5 +2118,86 @@ public class XMLEntityScanner implements XMLLocator {
|
||||
|
||||
} // skipDeclSpaces():boolean
|
||||
|
||||
/**
|
||||
* Normalizes newlines. As specified in XML specification, this method
|
||||
* converts newlines, '\n', '\r' and '\r\n' to '\n' as 2.11 End-of-Line Handling.
|
||||
* Further, it may put them in a cache for later process as needed, for example
|
||||
* as specified in 3.3.3 Attribute-Value Normalization.
|
||||
*
|
||||
* @ImplNote this method does not limit to processing external parsed entities
|
||||
* as 2.11 required. It handles all cases where newlines need to be processed.
|
||||
*
|
||||
* @param buffer the current content buffer
|
||||
* @param append a flag indicating whether to append to the buffer
|
||||
* @param storeWS a flag indicating whether the whitespaces need to be stored
|
||||
* for later processing
|
||||
* @param nt the type of the entity
|
||||
* @return true if the cursor is at the end of the current entity, false otherwise.
|
||||
* @throws IOException
|
||||
*/
|
||||
protected boolean normalizeNewlines(short version, XMLString buffer, boolean append,
|
||||
boolean storeWS, NameType nt)
|
||||
throws IOException {
|
||||
// normalize newlines
|
||||
offset = fCurrentEntity.position;
|
||||
int c = fCurrentEntity.ch[offset];
|
||||
newlines = 0;
|
||||
// how this information is used is determined by the caller of this method
|
||||
counted = false;
|
||||
if ((c == '\n' || c == '\r') ||
|
||||
(version == XML_VERSION_1_1 && (c == 0x85 || c == 0x2028))) {
|
||||
do {
|
||||
c = fCurrentEntity.ch[fCurrentEntity.position++];
|
||||
if ((c == '\n' || c == '\r') ||
|
||||
(version == XML_VERSION_1_1 && (c == 0x85 || c == 0x2028))) {
|
||||
newlines++;
|
||||
fCurrentEntity.lineNumber++;
|
||||
fCurrentEntity.columnNumber = 1;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count) {
|
||||
checkEntityLimit(nt, fCurrentEntity, offset, newlines);
|
||||
offset = 0;
|
||||
fCurrentEntity.position = newlines;
|
||||
if (load(newlines, false, true)) {
|
||||
counted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (c == '\r') {
|
||||
int cc = fCurrentEntity.ch[fCurrentEntity.position];
|
||||
if (cc == '\n' || (version == XML_VERSION_1_1 && cc == 0x85)) {
|
||||
fCurrentEntity.position++;
|
||||
offset++;
|
||||
}
|
||||
/*** NEWLINE NORMALIZATION ***/
|
||||
else {
|
||||
newlines++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fCurrentEntity.position--;
|
||||
break;
|
||||
}
|
||||
} while (fCurrentEntity.position < fCurrentEntity.count - 1);
|
||||
|
||||
for (int i = offset; i < fCurrentEntity.position; i++) {
|
||||
fCurrentEntity.ch[i] = '\n';
|
||||
if (storeWS) {
|
||||
storeWhiteSpace(i);
|
||||
}
|
||||
}
|
||||
|
||||
int length = fCurrentEntity.position - offset;
|
||||
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
|
||||
checkEntityLimit(nt, fCurrentEntity, offset, length);
|
||||
if (append) {
|
||||
buffer.append(fCurrentEntity.ch, offset, length);
|
||||
} else {
|
||||
buffer.setValues(fCurrentEntity.ch, offset, length);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // class XMLEntityScanner
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -67,7 +67,7 @@ import com.sun.xml.internal.stream.Entity;
|
||||
* @author Eric Ye, IBM
|
||||
* @author K.Venugopal SUN Microsystems
|
||||
* @author Sunitha Reddy, SUN Microsystems
|
||||
* @LastModified: Feb 2020
|
||||
* @LastModified: Aug 2021
|
||||
*/
|
||||
public abstract class XMLScanner
|
||||
implements XMLComponent {
|
||||
@ -956,12 +956,6 @@ public abstract class XMLScanner
|
||||
System.out.println("** valueF: \""
|
||||
+ stringBuffer.toString() + "\"");
|
||||
}
|
||||
} else if (c == '\n' || c == '\r') {
|
||||
fEntityScanner.scanChar(null);
|
||||
stringBuffer.append(' ');
|
||||
if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
|
||||
fStringBuffer2.append('\n');
|
||||
}
|
||||
} else if (c != -1 && XMLChar.isHighSurrogate(c)) {
|
||||
fStringBuffer3.clear();
|
||||
if (scanSurrogates(fStringBuffer3)) {
|
||||
|
@ -1,6 +1,5 @@
|
||||
/*
|
||||
* reserved comment block
|
||||
* DO NOT REMOVE OR ALTER!
|
||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -41,19 +40,11 @@ import com.sun.org.apache.xerces.internal.xni.XMLString;
|
||||
*
|
||||
* @author Andy Clark, IBM
|
||||
* @author Eric Ye, IBM
|
||||
*
|
||||
* @LastModified: Aug 2021
|
||||
*/
|
||||
public class XMLStringBuffer
|
||||
extends XMLString {
|
||||
|
||||
//
|
||||
// Constants
|
||||
//
|
||||
|
||||
|
||||
/** Default buffer size (32). */
|
||||
public static final int DEFAULT_SIZE = 32;
|
||||
|
||||
//
|
||||
// Data
|
||||
//
|
||||
@ -112,79 +103,4 @@ extends XMLString {
|
||||
length = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* append
|
||||
*
|
||||
* @param c
|
||||
*/
|
||||
public void append(char c) {
|
||||
if(this.length + 1 > this.ch.length){
|
||||
int newLength = this.ch.length * 2 ;
|
||||
if(newLength < this.ch.length + DEFAULT_SIZE){
|
||||
newLength = this.ch.length + DEFAULT_SIZE;
|
||||
}
|
||||
char [] tmp = new char[newLength];
|
||||
System.arraycopy(this.ch, 0, tmp, 0, this.length);
|
||||
this.ch = tmp;
|
||||
}
|
||||
this.ch[this.length] = c ;
|
||||
this.length++;
|
||||
} // append(char)
|
||||
|
||||
/**
|
||||
* append
|
||||
*
|
||||
* @param s
|
||||
*/
|
||||
public void append(String s) {
|
||||
int length = s.length();
|
||||
if (this.length + length > this.ch.length) {
|
||||
int newLength = this.ch.length * 2 ;
|
||||
if(newLength < this.ch.length + length + DEFAULT_SIZE){
|
||||
newLength = this.ch.length + length+ DEFAULT_SIZE;
|
||||
}
|
||||
|
||||
char[] newch = new char[newLength];
|
||||
System.arraycopy(this.ch, 0, newch, 0, this.length);
|
||||
this.ch = newch;
|
||||
}
|
||||
s.getChars(0, length, this.ch, this.length);
|
||||
this.length += length;
|
||||
} // append(String)
|
||||
|
||||
/**
|
||||
* append
|
||||
*
|
||||
* @param ch
|
||||
* @param offset
|
||||
* @param length
|
||||
*/
|
||||
public void append(char[] ch, int offset, int length) {
|
||||
if (this.length + length > this.ch.length) {
|
||||
int newLength = this.ch.length * 2 ;
|
||||
if(newLength < this.ch.length + length + DEFAULT_SIZE){
|
||||
newLength = this.ch.length + length + DEFAULT_SIZE;
|
||||
}
|
||||
char[] newch = new char[newLength];
|
||||
System.arraycopy(this.ch, 0, newch, 0, this.length);
|
||||
this.ch = newch;
|
||||
}
|
||||
//making the code more robust as it would handle null or 0 length data,
|
||||
//add the data only when it contains some thing
|
||||
if(ch != null && length > 0){
|
||||
System.arraycopy(ch, offset, this.ch, this.length, length);
|
||||
this.length += length;
|
||||
}
|
||||
} // append(char[],int,int)
|
||||
|
||||
/**
|
||||
* append
|
||||
*
|
||||
* @param s
|
||||
*/
|
||||
public void append(XMLString s) {
|
||||
append(s.ch, s.offset, s.length);
|
||||
} // append(XMLString)
|
||||
|
||||
|
||||
} // class XMLStringBuffer
|
||||
|
@ -1,6 +1,5 @@
|
||||
/*
|
||||
* reserved comment block
|
||||
* DO NOT REMOVE OR ALTER!
|
||||
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -42,9 +41,11 @@ package com.sun.org.apache.xerces.internal.xni;
|
||||
*
|
||||
* @author Eric Ye, IBM
|
||||
* @author Andy Clark, IBM
|
||||
*
|
||||
* @LastModified: Aug 2021
|
||||
*/
|
||||
public class XMLString {
|
||||
/** Default buffer size (32). */
|
||||
public static final int DEFAULT_SIZE = 32;
|
||||
|
||||
//
|
||||
// Data
|
||||
@ -189,4 +190,78 @@ public class XMLString {
|
||||
return length > 0 ? new String(ch, offset, length) : "";
|
||||
} // toString():String
|
||||
|
||||
/**
|
||||
* Appends a char to the buffer.
|
||||
*
|
||||
* @param c the char
|
||||
*/
|
||||
public void append(char c) {
|
||||
if(this.length + 1 > this.ch.length){
|
||||
int newLength = this.ch.length * 2 ;
|
||||
if(newLength < this.ch.length + DEFAULT_SIZE){
|
||||
newLength = this.ch.length + DEFAULT_SIZE;
|
||||
}
|
||||
char [] tmp = new char[newLength];
|
||||
System.arraycopy(this.ch, 0, tmp, 0, this.length);
|
||||
this.ch = tmp;
|
||||
}
|
||||
this.ch[this.length] = c ;
|
||||
this.length++;
|
||||
} // append(char)
|
||||
|
||||
/**
|
||||
* Appends a string to the buffer.
|
||||
*
|
||||
* @param s the string
|
||||
*/
|
||||
public void append(String s) {
|
||||
int length = s.length();
|
||||
if (this.length + length > this.ch.length) {
|
||||
int newLength = this.ch.length * 2 ;
|
||||
if(newLength < this.ch.length + length + DEFAULT_SIZE){
|
||||
newLength = this.ch.length + length+ DEFAULT_SIZE;
|
||||
}
|
||||
|
||||
char[] newch = new char[newLength];
|
||||
System.arraycopy(this.ch, 0, newch, 0, this.length);
|
||||
this.ch = newch;
|
||||
}
|
||||
s.getChars(0, length, this.ch, this.length);
|
||||
this.length += length;
|
||||
} // append(String)
|
||||
|
||||
/**
|
||||
* Appends a number of characters to the buffer.
|
||||
*
|
||||
* @param ch the char array
|
||||
* @param offset the offset
|
||||
* @param length the length
|
||||
*/
|
||||
public void append(char[] ch, int offset, int length) {
|
||||
if (this.length + length > this.ch.length) {
|
||||
int newLength = this.ch.length * 2 ;
|
||||
if(newLength < this.ch.length + length + DEFAULT_SIZE){
|
||||
newLength = this.ch.length + length + DEFAULT_SIZE;
|
||||
}
|
||||
char[] newch = new char[newLength];
|
||||
System.arraycopy(this.ch, 0, newch, 0, this.length);
|
||||
this.ch = newch;
|
||||
}
|
||||
//making the code more robust as it would handle null or 0 length data,
|
||||
//add the data only when it contains some thing
|
||||
if(ch != null && length > 0){
|
||||
System.arraycopy(ch, offset, this.ch, this.length, length);
|
||||
this.length += length;
|
||||
}
|
||||
} // append(char[],int,int)
|
||||
|
||||
/**
|
||||
* Appends another buffer to this buffer
|
||||
*
|
||||
* @param s another buffer
|
||||
*/
|
||||
public void append(XMLString s) {
|
||||
append(s.ch, s.offset, s.length);
|
||||
} // append(XMLString)
|
||||
|
||||
} // class XMLString
|
||||
|
Loading…
Reference in New Issue
Block a user