Merge
This commit is contained in:
commit
58f22a5049
@ -182,7 +182,8 @@ protected static final String PARSER_SETTINGS =
|
||||
EXTERNAL_GENERAL_ENTITIES,
|
||||
EXTERNAL_PARAMETER_ENTITIES,
|
||||
ALLOW_JAVA_ENCODINGS,
|
||||
WARN_ON_DUPLICATE_ENTITYDEF
|
||||
WARN_ON_DUPLICATE_ENTITYDEF,
|
||||
STANDARD_URI_CONFORMANT
|
||||
};
|
||||
|
||||
/** Feature defaults. */
|
||||
@ -192,6 +193,7 @@ protected static final String PARSER_SETTINGS =
|
||||
Boolean.TRUE,
|
||||
Boolean.TRUE,
|
||||
Boolean.FALSE,
|
||||
Boolean.FALSE
|
||||
};
|
||||
|
||||
/** Recognized properties. */
|
||||
|
@ -55,7 +55,7 @@ import com.sun.org.apache.xalan.internal.utils.ObjectFactory;
|
||||
final class CharInfo
|
||||
{
|
||||
/** Given a character, lookup a String to output (e.g. a decorated entity reference). */
|
||||
private HashMap m_charToString;
|
||||
private HashMap m_charToString = new HashMap();
|
||||
|
||||
/**
|
||||
* The name of the HTML entities file.
|
||||
@ -72,50 +72,42 @@ final class CharInfo
|
||||
"com.sun.org.apache.xml.internal.serializer.XMLEntities";
|
||||
|
||||
/** The horizontal tab character, which the parser should always normalize. */
|
||||
static final char S_HORIZONAL_TAB = 0x09;
|
||||
public static final char S_HORIZONAL_TAB = 0x09;
|
||||
|
||||
/** The linefeed character, which the parser should always normalize. */
|
||||
static final char S_LINEFEED = 0x0A;
|
||||
public static final char S_LINEFEED = 0x0A;
|
||||
|
||||
/** The carriage return character, which the parser should always normalize. */
|
||||
static final char S_CARRIAGERETURN = 0x0D;
|
||||
static final char S_SPACE = 0x20;
|
||||
static final char S_QUOTE = 0x22;
|
||||
static final char S_LT = 0x3C;
|
||||
static final char S_GT = 0x3E;
|
||||
static final char S_NEL = 0x85;
|
||||
static final char S_LINE_SEPARATOR = 0x2028;
|
||||
public static final char S_CARRIAGERETURN = 0x0D;
|
||||
|
||||
/** This flag is an optimization for HTML entities. It false if entities
|
||||
* other than quot (34), amp (38), lt (60) and gt (62) are defined
|
||||
* in the range 0 to 127.
|
||||
* @xsl.usage internal
|
||||
*/
|
||||
boolean onlyQuotAmpLtGt;
|
||||
final boolean onlyQuotAmpLtGt;
|
||||
|
||||
/** Copy the first 0,1 ... ASCII_MAX values into an array */
|
||||
static final int ASCII_MAX = 128;
|
||||
private static final int ASCII_MAX = 128;
|
||||
|
||||
/** Array of values is faster access than a set of bits
|
||||
* to quickly check ASCII characters in attribute values,
|
||||
* the value is true if the character in an attribute value
|
||||
* should be mapped to a String.
|
||||
* to quickly check ASCII characters in attribute values.
|
||||
*/
|
||||
private final boolean[] shouldMapAttrChar_ASCII;
|
||||
private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX];
|
||||
|
||||
/** Array of values is faster access than a set of bits
|
||||
* to quickly check ASCII characters in text nodes,
|
||||
* the value is true if the character in a text node
|
||||
* should be mapped to a String.
|
||||
* to quickly check ASCII characters in text nodes.
|
||||
*/
|
||||
private final boolean[] shouldMapTextChar_ASCII;
|
||||
private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX];
|
||||
|
||||
private boolean[] isCleanTextASCII = new boolean[ASCII_MAX];
|
||||
|
||||
/** An array of bits to record if the character is in the set.
|
||||
* Although information in this array is complete, the
|
||||
* isSpecialAttrASCII array is used first because access to its values
|
||||
* is common and faster.
|
||||
*/
|
||||
private final int array_of_bits[];
|
||||
private int array_of_bits[] = createEmptySetOfIntegers(65535);
|
||||
|
||||
|
||||
// 5 for 32 bit words, 6 for 64 bit words ...
|
||||
@ -146,38 +138,33 @@ final class CharInfo
|
||||
|
||||
|
||||
/**
|
||||
* A base constructor just to explicitly create the fields,
|
||||
* with the exception of m_charToString which is handled
|
||||
* by the constructor that delegates base construction to this one.
|
||||
* <p>
|
||||
* m_charToString is not created here only for performance reasons,
|
||||
* to avoid creating a Hashtable that will be replaced when
|
||||
* making a mutable copy, {@link #mutableCopyOf(CharInfo)}.
|
||||
* Constructor that reads in a resource file that describes the mapping of
|
||||
* characters to entity references.
|
||||
* This constructor is private, just to force the use
|
||||
* of the getCharInfo(entitiesResource) factory
|
||||
*
|
||||
* Resource files must be encoded in UTF-8 and can either be properties
|
||||
* files with a .properties extension assumed. Alternatively, they can
|
||||
* have the following form, with no particular extension assumed:
|
||||
*
|
||||
* <pre>
|
||||
* # First char # is a comment
|
||||
* Entity numericValue
|
||||
* quot 34
|
||||
* amp 38
|
||||
* </pre>
|
||||
*
|
||||
* @param entitiesResource Name of properties or resource file that should
|
||||
* be loaded, which describes that mapping of characters to entity
|
||||
* references.
|
||||
*/
|
||||
private CharInfo()
|
||||
private CharInfo(String entitiesResource, String method)
|
||||
{
|
||||
this.array_of_bits = createEmptySetOfIntegers(65535);
|
||||
this.firstWordNotUsed = 0;
|
||||
this.shouldMapAttrChar_ASCII = new boolean[ASCII_MAX];
|
||||
this.shouldMapTextChar_ASCII = new boolean[ASCII_MAX];
|
||||
this.m_charKey = new CharKey();
|
||||
|
||||
// Not set here, but in a constructor that uses this one
|
||||
// this.m_charToString = new Hashtable();
|
||||
|
||||
this.onlyQuotAmpLtGt = true;
|
||||
|
||||
|
||||
return;
|
||||
this(entitiesResource, method, false);
|
||||
}
|
||||
|
||||
private CharInfo(String entitiesResource, String method, boolean internal)
|
||||
{
|
||||
// call the default constructor to create the fields
|
||||
this();
|
||||
m_charToString = new HashMap();
|
||||
|
||||
ResourceBundle entities = null;
|
||||
boolean noExtraEntities = true;
|
||||
|
||||
@ -203,10 +190,12 @@ final class CharInfo
|
||||
String name = (String) keys.nextElement();
|
||||
String value = entities.getString(name);
|
||||
int code = Integer.parseInt(value);
|
||||
boolean extra = defineEntity(name, (char) code);
|
||||
if (extra)
|
||||
defineEntity(name, (char) code);
|
||||
if (extraEntity(code))
|
||||
noExtraEntities = false;
|
||||
}
|
||||
set(S_LINEFEED);
|
||||
set(S_CARRIAGERETURN);
|
||||
} else {
|
||||
InputStream is = null;
|
||||
|
||||
@ -290,8 +279,8 @@ final class CharInfo
|
||||
|
||||
int code = Integer.parseInt(value);
|
||||
|
||||
boolean extra = defineEntity(name, (char) code);
|
||||
if (extra)
|
||||
defineEntity(name, (char) code);
|
||||
if (extraEntity(code))
|
||||
noExtraEntities = false;
|
||||
}
|
||||
}
|
||||
@ -300,6 +289,8 @@ final class CharInfo
|
||||
}
|
||||
|
||||
is.close();
|
||||
set(S_LINEFEED);
|
||||
set(S_CARRIAGERETURN);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(
|
||||
Utils.messages.createMessage(
|
||||
@ -317,8 +308,31 @@ final class CharInfo
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize the array isCleanTextASCII[] with a cache of values
|
||||
* for use by ToStream.character(char[], int , int)
|
||||
* and the array isSpecialTextASCII[] with the opposite values
|
||||
* (all in the name of performance!)
|
||||
*/
|
||||
for (int ch = 0; ch <ASCII_MAX; ch++)
|
||||
if((((0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 == ch)))
|
||||
&& (!get(ch))) || ('"' == ch))
|
||||
{
|
||||
isCleanTextASCII[ch] = true;
|
||||
isSpecialTextASCII[ch] = false;
|
||||
}
|
||||
else {
|
||||
isCleanTextASCII[ch] = false;
|
||||
isSpecialTextASCII[ch] = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
onlyQuotAmpLtGt = noExtraEntities;
|
||||
|
||||
// initialize the array with a cache of the BitSet values
|
||||
for (int i=0; i<ASCII_MAX; i++)
|
||||
isSpecialAttrASCII[i] = get(i);
|
||||
|
||||
/* Now that we've used get(ch) just above to initialize the
|
||||
* two arrays we will change by adding a tab to the set of
|
||||
* special chars for XML (but not HTML!).
|
||||
@ -330,18 +344,7 @@ final class CharInfo
|
||||
*/
|
||||
if (Method.XML.equals(method))
|
||||
{
|
||||
// We choose not to escape the quotation mark as " in text nodes
|
||||
shouldMapTextChar_ASCII[S_QUOTE] = false;
|
||||
}
|
||||
|
||||
if (Method.HTML.equals(method)) {
|
||||
// The XSLT 1.0 recommendation says
|
||||
// "The html output method should not escape < characters occurring in attribute values."
|
||||
// So we don't escape '<' in an attribute for HTML
|
||||
shouldMapAttrChar_ASCII['<'] = false;
|
||||
|
||||
// We choose not to escape the quotation mark as " in text nodes.
|
||||
shouldMapTextChar_ASCII[S_QUOTE] = false;
|
||||
isSpecialAttrASCII[S_HORIZONAL_TAB] = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,38 +353,22 @@ final class CharInfo
|
||||
* supplied. Nothing happens if the character reference is already defined.
|
||||
* <p>Unlike internal entities, character references are a string to single
|
||||
* character mapping. They are used to map non-ASCII characters both on
|
||||
* parsing and printing, primarily for HTML documents. '&lt;' is an
|
||||
* parsing and printing, primarily for HTML documents. '<amp;' is an
|
||||
* example of a character reference.</p>
|
||||
*
|
||||
* @param name The entity's name
|
||||
* @param value The entity's value
|
||||
* @return true if the mapping is not one of:
|
||||
* <ul>
|
||||
* <li> '<' to "<"
|
||||
* <li> '>' to ">"
|
||||
* <li> '&' to "&"
|
||||
* <li> '"' to """
|
||||
* </ul>
|
||||
*/
|
||||
private boolean defineEntity(String name, char value)
|
||||
private void defineEntity(String name, char value)
|
||||
{
|
||||
StringBuilder sb = new StringBuilder("&");
|
||||
sb.append(name);
|
||||
sb.append(';');
|
||||
String entityString = sb.toString();
|
||||
|
||||
boolean extra = defineChar2StringMapping(entityString, value);
|
||||
return extra;
|
||||
defineChar2StringMapping(entityString, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* A utility object, just used to map characters to output Strings,
|
||||
* needed because a HashMap needs to map an object as a key, not a
|
||||
* Java primitive type, like a char, so this object gets around that
|
||||
* and it is reusable.
|
||||
*/
|
||||
private final CharKey m_charKey;
|
||||
|
||||
/**
|
||||
* Map a character to a String. For example given
|
||||
* the character '>' this method would return the fully decorated
|
||||
@ -413,21 +400,21 @@ final class CharInfo
|
||||
|
||||
/**
|
||||
* Tell if the character argument that is from
|
||||
* an attribute value has a mapping to a String.
|
||||
* an attribute value should have special treatment.
|
||||
*
|
||||
* @param value the value of a character that is in an attribute value
|
||||
* @return true if the character should have any special treatment,
|
||||
* such as when writing out attribute values,
|
||||
* such as when writing out entity references.
|
||||
* or entity references.
|
||||
* @xsl.usage internal
|
||||
*/
|
||||
final boolean shouldMapAttrChar(int value)
|
||||
final boolean isSpecialAttrChar(int value)
|
||||
{
|
||||
// for performance try the values in the boolean array first,
|
||||
// this is faster access than the BitSet for common ASCII values
|
||||
|
||||
if (value < ASCII_MAX)
|
||||
return shouldMapAttrChar_ASCII[value];
|
||||
return isSpecialAttrASCII[value];
|
||||
|
||||
// rather than java.util.BitSet, our private
|
||||
// implementation is faster (and less general).
|
||||
@ -436,27 +423,48 @@ final class CharInfo
|
||||
|
||||
/**
|
||||
* Tell if the character argument that is from a
|
||||
* text node has a mapping to a String, for example
|
||||
* to map '<' to "<".
|
||||
* text node should have special treatment.
|
||||
*
|
||||
* @param value the value of a character that is in a text node
|
||||
* @return true if the character has a mapping to a String,
|
||||
* such as when writing out entity references.
|
||||
* @return true if the character should have any special treatment,
|
||||
* such as when writing out attribute values,
|
||||
* or entity references.
|
||||
* @xsl.usage internal
|
||||
*/
|
||||
final boolean shouldMapTextChar(int value)
|
||||
final boolean isSpecialTextChar(int value)
|
||||
{
|
||||
// for performance try the values in the boolean array first,
|
||||
// this is faster access than the BitSet for common ASCII values
|
||||
|
||||
if (value < ASCII_MAX)
|
||||
return shouldMapTextChar_ASCII[value];
|
||||
return isSpecialTextASCII[value];
|
||||
|
||||
// rather than java.util.BitSet, our private
|
||||
// implementation is faster (and less general).
|
||||
return get(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used to determine if an ASCII character in
|
||||
* a text node (not an attribute value) is "clean".
|
||||
* @param value the character to check (0 to 127).
|
||||
* @return true if the character can go to the writer as-is
|
||||
* @xsl.usage internal
|
||||
*/
|
||||
final boolean isTextASCIIClean(int value)
|
||||
{
|
||||
return isCleanTextASCII[value];
|
||||
}
|
||||
|
||||
// In the future one might want to use the array directly and avoid
|
||||
// the method call, but I think the JIT alreay inlines this well enough
|
||||
// so don't do it (for now) - bjm
|
||||
// public final boolean[] getASCIIClean()
|
||||
// {
|
||||
// return isCleanTextASCII;
|
||||
// }
|
||||
|
||||
|
||||
private static CharInfo getCharInfoBasedOnPrivilege(
|
||||
final String entitiesFileName, final String method,
|
||||
final boolean internal){
|
||||
@ -491,17 +499,15 @@ final class CharInfo
|
||||
{
|
||||
CharInfo charInfo = (CharInfo) m_getCharInfoCache.get(entitiesFileName);
|
||||
if (charInfo != null) {
|
||||
return mutableCopyOf(charInfo);
|
||||
return charInfo;
|
||||
}
|
||||
|
||||
// try to load it internally - cache
|
||||
try {
|
||||
charInfo = getCharInfoBasedOnPrivilege(entitiesFileName,
|
||||
method, true);
|
||||
// Put the common copy of charInfo in the cache, but return
|
||||
// a copy of it.
|
||||
m_getCharInfoCache.put(entitiesFileName, charInfo);
|
||||
return mutableCopyOf(charInfo);
|
||||
return charInfo;
|
||||
} catch (Exception e) {}
|
||||
|
||||
// try to load it externally - do not cache
|
||||
@ -528,41 +534,7 @@ final class CharInfo
|
||||
method, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a mutable copy of the cached one.
|
||||
* @param charInfo The cached one.
|
||||
* @return
|
||||
*/
|
||||
private static CharInfo mutableCopyOf(CharInfo charInfo) {
|
||||
CharInfo copy = new CharInfo();
|
||||
|
||||
int max = charInfo.array_of_bits.length;
|
||||
System.arraycopy(charInfo.array_of_bits,0,copy.array_of_bits,0,max);
|
||||
|
||||
copy.firstWordNotUsed = charInfo.firstWordNotUsed;
|
||||
|
||||
max = charInfo.shouldMapAttrChar_ASCII.length;
|
||||
System.arraycopy(charInfo.shouldMapAttrChar_ASCII,0,copy.shouldMapAttrChar_ASCII,0,max);
|
||||
|
||||
max = charInfo.shouldMapTextChar_ASCII.length;
|
||||
System.arraycopy(charInfo.shouldMapTextChar_ASCII,0,copy.shouldMapTextChar_ASCII,0,max);
|
||||
|
||||
// utility field copy.m_charKey is already created in the default constructor
|
||||
|
||||
copy.m_charToString = (HashMap) charInfo.m_charToString.clone();
|
||||
|
||||
copy.onlyQuotAmpLtGt = charInfo.onlyQuotAmpLtGt;
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Table of user-specified char infos.
|
||||
* The table maps entify file names (the name of the
|
||||
* property file without the .properties extension)
|
||||
* to CharInfo objects populated with entities defined in
|
||||
* corresponding property file.
|
||||
*/
|
||||
/** Table of user-specified char infos. */
|
||||
private static HashMap m_getCharInfoCache = new HashMap();
|
||||
|
||||
/**
|
||||
@ -604,8 +576,7 @@ final class CharInfo
|
||||
* the creation of the set.
|
||||
*/
|
||||
private final void set(int i) {
|
||||
setASCIItextDirty(i);
|
||||
setASCIIattrDirty(i);
|
||||
setASCIIdirty(i);
|
||||
|
||||
int j = (i >> SHIFT_PER_WORD); // this word is used
|
||||
int k = j + 1;
|
||||
@ -640,43 +611,24 @@ final class CharInfo
|
||||
return in_the_set;
|
||||
}
|
||||
|
||||
// record if there are any entities other than
|
||||
// quot, amp, lt, gt (probably user defined)
|
||||
/**
|
||||
* This method returns true if there are some non-standard mappings to
|
||||
* entities other than quot, amp, lt, gt, and its only purpose is for
|
||||
* performance.
|
||||
* @param charToMap The value of the character that is mapped to a String
|
||||
* @param outputString The String to which the character is mapped, usually
|
||||
* an entity reference such as "<".
|
||||
* @return true if the mapping is not one of:
|
||||
* <ul>
|
||||
* <li> '<' to "<"
|
||||
* <li> '>' to ">"
|
||||
* <li> '&' to "&"
|
||||
* <li> '"' to """
|
||||
* </ul>
|
||||
* @return true if the entity
|
||||
* @param code The value of the character that has an entity defined
|
||||
* for it.
|
||||
*/
|
||||
private boolean extraEntity(String outputString, int charToMap)
|
||||
private boolean extraEntity(int entityValue)
|
||||
{
|
||||
boolean extra = false;
|
||||
if (charToMap < ASCII_MAX)
|
||||
if (entityValue < 128)
|
||||
{
|
||||
switch (charToMap)
|
||||
switch (entityValue)
|
||||
{
|
||||
case '"' : // quot
|
||||
if (!outputString.equals("""))
|
||||
extra = true;
|
||||
break;
|
||||
case '&' : // amp
|
||||
if (!outputString.equals("&"))
|
||||
extra = true;
|
||||
break;
|
||||
case '<' : // lt
|
||||
if (!outputString.equals("<"))
|
||||
extra = true;
|
||||
break;
|
||||
case '>' : // gt
|
||||
if (!outputString.equals(">"))
|
||||
extra = true;
|
||||
case 34 : // quot
|
||||
case 38 : // amp
|
||||
case 60 : // lt
|
||||
case 62 : // gt
|
||||
break;
|
||||
default : // other entity in range 0 to 127
|
||||
extra = true;
|
||||
@ -686,61 +638,49 @@ final class CharInfo
|
||||
}
|
||||
|
||||
/**
|
||||
* If the character is in the ASCII range then
|
||||
* mark it as needing replacement with
|
||||
* a String on output if it occurs in a text node.
|
||||
* If the character is a printable ASCII character then
|
||||
* mark it as not clean and needing replacement with
|
||||
* a String on output.
|
||||
* @param ch
|
||||
*/
|
||||
private void setASCIItextDirty(int j)
|
||||
private void setASCIIdirty(int j)
|
||||
{
|
||||
if (0 <= j && j < ASCII_MAX)
|
||||
{
|
||||
shouldMapTextChar_ASCII[j] = true;
|
||||
isCleanTextASCII[j] = false;
|
||||
isSpecialTextASCII[j] = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the character is in the ASCII range then
|
||||
* mark it as needing replacement with
|
||||
* a String on output if it occurs in a attribute value.
|
||||
* If the character is a printable ASCII character then
|
||||
* mark it as and not needing replacement with
|
||||
* a String on output.
|
||||
* @param ch
|
||||
*/
|
||||
private void setASCIIattrDirty(int j)
|
||||
private void setASCIIclean(int j)
|
||||
{
|
||||
if (0 <= j && j < ASCII_MAX)
|
||||
{
|
||||
shouldMapAttrChar_ASCII[j] = true;
|
||||
isCleanTextASCII[j] = true;
|
||||
isSpecialTextASCII[j] = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this method to register a char to String mapping, for example
|
||||
* to map '<' to "<".
|
||||
* @param outputString The String to map to.
|
||||
* @param inputChar The char to map from.
|
||||
* @return true if the mapping is not one of:
|
||||
* <ul>
|
||||
* <li> '<' to "<"
|
||||
* <li> '>' to ">"
|
||||
* <li> '&' to "&"
|
||||
* <li> '"' to """
|
||||
* </ul>
|
||||
*/
|
||||
boolean defineChar2StringMapping(String outputString, char inputChar)
|
||||
private void defineChar2StringMapping(String outputString, char inputChar)
|
||||
{
|
||||
CharKey character = new CharKey(inputChar);
|
||||
m_charToString.put(character, outputString);
|
||||
set(inputChar); // mark the character has having a mapping to a String
|
||||
|
||||
boolean extraMapping = extraEntity(outputString, inputChar);
|
||||
return extraMapping;
|
||||
|
||||
set(inputChar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple class for fast lookup of char values, when used with
|
||||
* hashtables. You can set the char, then use it as a key.
|
||||
*
|
||||
* This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
|
||||
* It exists to cut the serializers dependancy on that package.
|
||||
*
|
||||
* @xsl.usage internal
|
||||
*/
|
||||
private static class CharKey extends Object
|
||||
|
@ -58,7 +58,7 @@ public final class ToHTMLStream extends ToStream
|
||||
* Map that tells which XML characters should have special treatment, and it
|
||||
* provides character to entity name lookup.
|
||||
*/
|
||||
private final CharInfo m_htmlcharInfo =
|
||||
private static final CharInfo m_htmlcharInfo =
|
||||
// new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
|
||||
CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
|
||||
|
||||
@ -1369,7 +1369,7 @@ public final class ToHTMLStream extends ToStream
|
||||
// System.out.println("ch: "+(int)ch);
|
||||
// System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
|
||||
// System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
|
||||
if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch)))
|
||||
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
|
||||
{
|
||||
cleanLength++;
|
||||
}
|
||||
|
@ -919,8 +919,7 @@ abstract public class ToStream extends SerializerBase
|
||||
{
|
||||
// This is the old/fast code here, but is this
|
||||
// correct for all encodings?
|
||||
if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch ||
|
||||
CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch))
|
||||
if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch))
|
||||
ret= true;
|
||||
else
|
||||
ret = false;
|
||||
@ -1029,7 +1028,7 @@ abstract public class ToStream extends SerializerBase
|
||||
*
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
int accumDefaultEntity(
|
||||
protected int accumDefaultEntity(
|
||||
java.io.Writer writer,
|
||||
char ch,
|
||||
int i,
|
||||
@ -1048,7 +1047,7 @@ abstract public class ToStream extends SerializerBase
|
||||
{
|
||||
// if this is text node character and a special one of those,
|
||||
// or if this is a character from attribute value and a special one of those
|
||||
if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))
|
||||
if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))
|
||||
{
|
||||
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
|
||||
|
||||
@ -1399,6 +1398,7 @@ abstract public class ToStream extends SerializerBase
|
||||
|
||||
if (m_cdataTagOpen)
|
||||
closeCDATA();
|
||||
// the check with _escaping is a bit of a hack for XLSTC
|
||||
|
||||
if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
|
||||
{
|
||||
@ -1421,173 +1421,82 @@ abstract public class ToStream extends SerializerBase
|
||||
try
|
||||
{
|
||||
int i;
|
||||
char ch1;
|
||||
int startClean;
|
||||
|
||||
// skip any leading whitspace
|
||||
// don't go off the end and use a hand inlined version
|
||||
// of isWhitespace(ch)
|
||||
final int end = start + length;
|
||||
int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed
|
||||
// that was processed
|
||||
final Writer writer = m_writer;
|
||||
boolean isAllWhitespace = true;
|
||||
|
||||
// process any leading whitspace
|
||||
i = start;
|
||||
while (i < end && isAllWhitespace) {
|
||||
char ch1 = chars[i];
|
||||
|
||||
if (m_charInfo.shouldMapTextChar(ch1)) {
|
||||
// The character is supposed to be replaced by a String
|
||||
// so write out the clean whitespace characters accumulated
|
||||
// so far
|
||||
// then the String.
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
String outputStringForChar = m_charInfo
|
||||
.getOutputStringForChar(ch1);
|
||||
writer.write(outputStringForChar);
|
||||
// We can't say that everything we are writing out is
|
||||
// all whitespace, we just wrote out a String.
|
||||
isAllWhitespace = false;
|
||||
lastDirtyCharProcessed = i; // mark the last non-clean
|
||||
// character processed
|
||||
i++;
|
||||
} else {
|
||||
// The character is clean, but is it a whitespace ?
|
||||
switch (ch1) {
|
||||
// TODO: Any other whitespace to consider?
|
||||
case CharInfo.S_SPACE:
|
||||
// Just accumulate the clean whitespace
|
||||
i++;
|
||||
break;
|
||||
case CharInfo.S_LINEFEED:
|
||||
lastDirtyCharProcessed = processLineFeed(chars, i,
|
||||
lastDirtyCharProcessed, writer);
|
||||
i++;
|
||||
break;
|
||||
case CharInfo.S_CARRIAGERETURN:
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
writer.write(" ");
|
||||
lastDirtyCharProcessed = i;
|
||||
i++;
|
||||
break;
|
||||
case CharInfo.S_HORIZONAL_TAB:
|
||||
// Just accumulate the clean whitespace
|
||||
i++;
|
||||
break;
|
||||
default:
|
||||
// The character was clean, but not a whitespace
|
||||
// so break the loop to continue with this character
|
||||
// (we don't increment index i !!)
|
||||
isAllWhitespace = false;
|
||||
break;
|
||||
}
|
||||
int lastDirty = start - 1; // last character that needed processing
|
||||
for (i = start;
|
||||
((i < end)
|
||||
&& ((ch1 = chars[i]) == 0x20
|
||||
|| (ch1 == 0xA && m_lineSepUse)
|
||||
|| ch1 == 0xD
|
||||
|| ch1 == 0x09));
|
||||
i++)
|
||||
{
|
||||
/*
|
||||
* We are processing leading whitespace, but are doing the same
|
||||
* processing for dirty characters here as for non-whitespace.
|
||||
*
|
||||
*/
|
||||
if (!m_charInfo.isTextASCIIClean(ch1))
|
||||
{
|
||||
lastDirty = processDirty(chars,end, i,ch1, lastDirty, true);
|
||||
i = lastDirty;
|
||||
}
|
||||
}
|
||||
/* If there is some non-whitespace, mark that we may need
|
||||
* to preserve this. This is only important if we have indentation on.
|
||||
*/
|
||||
if (i < end || !isAllWhitespace)
|
||||
if (i < end)
|
||||
m_ispreserve = true;
|
||||
|
||||
|
||||
// int lengthClean; // number of clean characters in a row
|
||||
// final boolean[] isAsciiClean = m_charInfo.getASCIIClean();
|
||||
|
||||
final boolean isXML10 = XMLVERSION10.equals(getVersion());
|
||||
// we've skipped the leading whitespace, now deal with the rest
|
||||
for (; i < end; i++)
|
||||
{
|
||||
char ch = chars[i];
|
||||
|
||||
if (m_charInfo.shouldMapTextChar(ch)) {
|
||||
// The character is supposed to be replaced by a String
|
||||
// e.g. '&' --> "&"
|
||||
// e.g. '<' --> "<"
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
|
||||
writer.write(outputStringForChar);
|
||||
lastDirtyCharProcessed = i;
|
||||
}
|
||||
else {
|
||||
if (ch <= 0x1F) {
|
||||
// Range 0x00 through 0x1F inclusive
|
||||
//
|
||||
// This covers the non-whitespace control characters
|
||||
// in the range 0x1 to 0x1F inclusive.
|
||||
// It also covers the whitespace control characters in the same way:
|
||||
// 0x9 TAB
|
||||
// 0xA NEW LINE
|
||||
// 0xD CARRIAGE RETURN
|
||||
//
|
||||
// We also cover 0x0 ... It isn't valid
|
||||
// but we will output "�"
|
||||
|
||||
// The default will handle this just fine, but this
|
||||
// is a little performance boost to handle the more
|
||||
// common TAB, NEW-LINE, CARRIAGE-RETURN
|
||||
switch (ch) {
|
||||
|
||||
case CharInfo.S_HORIZONAL_TAB:
|
||||
// Leave whitespace TAB as a real character
|
||||
break;
|
||||
case CharInfo.S_LINEFEED:
|
||||
lastDirtyCharProcessed = processLineFeed(chars, i, lastDirtyCharProcessed, writer);
|
||||
break;
|
||||
case CharInfo.S_CARRIAGERETURN:
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
writer.write(" ");
|
||||
lastDirtyCharProcessed = i;
|
||||
// Leave whitespace carriage return as a real character
|
||||
break;
|
||||
default:
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
lastDirtyCharProcessed = i;
|
||||
{
|
||||
// A tight loop to skip over common clean chars
|
||||
// This tight loop makes it easier for the JIT
|
||||
// to optimize.
|
||||
char ch2;
|
||||
while (i<end
|
||||
&& ((ch2 = chars[i])<127)
|
||||
&& m_charInfo.isTextASCIIClean(ch2))
|
||||
i++;
|
||||
if (i == end)
|
||||
break;
|
||||
}
|
||||
|
||||
final char ch = chars[i];
|
||||
/* The check for isCharacterInC0orC1Ranger and
|
||||
* isNELorLSEPCharacter has been added
|
||||
* to support Control Characters in XML 1.1
|
||||
*/
|
||||
if (!isCharacterInC0orC1Range(ch) &&
|
||||
(isXML10 || !isNELorLSEPCharacter(ch)) &&
|
||||
(escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch)))
|
||||
|| ('"' == ch))
|
||||
{
|
||||
; // a character needing no special processing
|
||||
}
|
||||
}
|
||||
else if (ch < 0x7F) {
|
||||
// Range 0x20 through 0x7E inclusive
|
||||
// Normal ASCII chars, do nothing, just add it to
|
||||
// the clean characters
|
||||
|
||||
}
|
||||
else if (ch <= 0x9F){
|
||||
// Range 0x7F through 0x9F inclusive
|
||||
// More control characters, including NEL (0x85)
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
lastDirtyCharProcessed = i;
|
||||
}
|
||||
else if (ch == CharInfo.S_LINE_SEPARATOR) {
|
||||
// LINE SEPARATOR
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
writer.write("
");
|
||||
lastDirtyCharProcessed = i;
|
||||
}
|
||||
else if (m_encodingInfo.isInEncoding(ch)) {
|
||||
// If the character is in the encoding, and
|
||||
// not in the normal ASCII range, we also
|
||||
// just leave it get added on to the clean characters
|
||||
|
||||
}
|
||||
else {
|
||||
// This is a fallback plan, we should never get here
|
||||
// but if the character wasn't previously handled
|
||||
// (i.e. isn't in the encoding, etc.) then what
|
||||
// should we do? We choose to write out an entity
|
||||
writeOutCleanChars(chars, i, lastDirtyCharProcessed);
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
lastDirtyCharProcessed = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
lastDirty = processDirty(chars,end, i, ch, lastDirty, true);
|
||||
i = lastDirty;
|
||||
}
|
||||
}
|
||||
|
||||
// we've reached the end. Any clean characters at the
|
||||
// end of the array than need to be written out?
|
||||
startClean = lastDirtyCharProcessed + 1;
|
||||
startClean = lastDirty + 1;
|
||||
if (i > startClean)
|
||||
{
|
||||
int lengthClean = i - startClean;
|
||||
@ -1606,32 +1515,6 @@ abstract public class ToStream extends SerializerBase
|
||||
if (m_tracer != null)
|
||||
super.fireCharEvent(chars, start, length);
|
||||
}
|
||||
|
||||
private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException {
|
||||
if (!m_lineSepUse
|
||||
|| (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){
|
||||
// We are leaving the new-line alone, and it is just
|
||||
// being added to the 'clean' characters,
|
||||
// so the last dirty character processed remains unchanged
|
||||
}
|
||||
else {
|
||||
writeOutCleanChars(chars, i, lastProcessed);
|
||||
writer.write(m_lineSep, 0, m_lineSepLen);
|
||||
lastProcessed = i;
|
||||
}
|
||||
return lastProcessed;
|
||||
}
|
||||
|
||||
private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException {
|
||||
int startClean;
|
||||
startClean = lastProcessed + 1;
|
||||
if (startClean < i)
|
||||
{
|
||||
int lengthClean = i - startClean;
|
||||
m_writer.write(chars, startClean, lengthClean);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method checks if a given character is between C0 or C1 range
|
||||
* of Control characters.
|
||||
@ -1751,7 +1634,7 @@ abstract public class ToStream extends SerializerBase
|
||||
*
|
||||
* @throws org.xml.sax.SAXException
|
||||
*/
|
||||
private int accumDefaultEscape(
|
||||
protected int accumDefaultEscape(
|
||||
Writer writer,
|
||||
char ch,
|
||||
int i,
|
||||
@ -1815,15 +1698,16 @@ abstract public class ToStream extends SerializerBase
|
||||
* to write it out as Numeric Character Reference(NCR) regardless of XML Version
|
||||
* being used for output document.
|
||||
*/
|
||||
if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch))
|
||||
if (isCharacterInC0orC1Range(ch) ||
|
||||
(XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
|
||||
{
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
}
|
||||
else if ((!escapingNotNeeded(ch) ||
|
||||
( (fromTextNode && m_charInfo.shouldMapTextChar(ch))
|
||||
|| (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))))
|
||||
( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
|
||||
|| (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
|
||||
&& m_elemContext.m_currentElemDepth > 0)
|
||||
{
|
||||
writer.write("&#");
|
||||
@ -2090,83 +1974,25 @@ abstract public class ToStream extends SerializerBase
|
||||
for (int i = 0; i < len; )
|
||||
{
|
||||
char ch = stringChars[i];
|
||||
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
|
||||
{
|
||||
writer.write(ch);
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{ // I guess the parser doesn't normalize cr/lf in attributes. -sb
|
||||
// if ((CharInfo.S_CARRIAGERETURN == ch)
|
||||
// && ((i + 1) < len)
|
||||
// && (CharInfo.S_LINEFEED == stringChars[i + 1]))
|
||||
// {
|
||||
// i++;
|
||||
// ch = CharInfo.S_LINEFEED;
|
||||
// }
|
||||
|
||||
if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) {
|
||||
// The character is supposed to be replaced by a String
|
||||
// e.g. '&' --> "&"
|
||||
// e.g. '<' --> "<"
|
||||
i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
|
||||
}
|
||||
else {
|
||||
i++;
|
||||
if (0x0 <= ch && ch <= 0x1F) {
|
||||
// Range 0x00 through 0x1F inclusive
|
||||
// This covers the non-whitespace control characters
|
||||
// in the range 0x1 to 0x1F inclusive.
|
||||
// It also covers the whitespace control characters in the same way:
|
||||
// 0x9 TAB
|
||||
// 0xA NEW LINE
|
||||
// 0xD CARRIAGE RETURN
|
||||
//
|
||||
// We also cover 0x0 ... It isn't valid
|
||||
// but we will output "�"
|
||||
|
||||
// The default will handle this just fine, but this
|
||||
// is a little performance boost to handle the more
|
||||
// common TAB, NEW-LINE, CARRIAGE-RETURN
|
||||
switch (ch) {
|
||||
|
||||
case CharInfo.S_HORIZONAL_TAB:
|
||||
writer.write("	");
|
||||
break;
|
||||
case CharInfo.S_LINEFEED:
|
||||
writer.write(" ");
|
||||
break;
|
||||
case CharInfo.S_CARRIAGERETURN:
|
||||
writer.write(" ");
|
||||
break;
|
||||
default:
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
else if (ch < 0x7F) {
|
||||
// Range 0x20 through 0x7E inclusive
|
||||
// Normal ASCII chars
|
||||
writer.write(ch);
|
||||
}
|
||||
else if (ch <= 0x9F){
|
||||
// Range 0x7F through 0x9F inclusive
|
||||
// More control characters
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
}
|
||||
else if (ch == CharInfo.S_LINE_SEPARATOR) {
|
||||
// LINE SEPARATOR
|
||||
writer.write("
");
|
||||
}
|
||||
else if (m_encodingInfo.isInEncoding(ch)) {
|
||||
// If the character is in the encoding, and
|
||||
// not in the normal ASCII range, we also
|
||||
// just write it out
|
||||
writer.write(ch);
|
||||
}
|
||||
else {
|
||||
// This is a fallback plan, we should never get here
|
||||
// but if the character wasn't previously handled
|
||||
// (i.e. isn't in the encoding, etc.) then what
|
||||
// should we do? We choose to write out a character ref
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2936,14 +2762,6 @@ abstract public class ToStream extends SerializerBase
|
||||
closeCDATA();
|
||||
m_cdataTagOpen = false;
|
||||
}
|
||||
if (m_writer != null) {
|
||||
try {
|
||||
m_writer.flush();
|
||||
}
|
||||
catch(IOException e) {
|
||||
// what? me worry?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void setContentHandler(ContentHandler ch)
|
||||
|
@ -56,7 +56,7 @@ public final class ToXMLStream extends ToStream
|
||||
* Map that tells which XML characters should have special treatment, and it
|
||||
* provides character to entity name lookup.
|
||||
*/
|
||||
private CharInfo m_xmlcharInfo =
|
||||
private static CharInfo m_xmlcharInfo =
|
||||
// new CharInfo(CharInfo.XML_ENTITIES_RESOURCE);
|
||||
CharInfo.getCharInfo(CharInfo.XML_ENTITIES_RESOURCE, Method.XML);
|
||||
|
||||
@ -329,11 +329,12 @@ public final class ToXMLStream extends ToStream
|
||||
|
||||
/**
|
||||
* Before Xalan 1497, a newline char was printed out if not inside of an
|
||||
* element. The whitespace is not significant if the output is standalone
|
||||
* element. The whitespace is not significant is the output is standalone
|
||||
*/
|
||||
if (m_elemContext.m_currentElemDepth <= 0 && m_isStandalone)
|
||||
writer.write(m_lineSep, 0, m_lineSepLen);
|
||||
|
||||
|
||||
/*
|
||||
* Don't write out any indentation whitespace now,
|
||||
* because there may be non-whitespace text after this.
|
||||
|
Loading…
Reference in New Issue
Block a user