From aca1037e9f29161c0a84a1e36a46a8854a4355a2 Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Thu, 7 Jun 2012 13:47:53 -0700 Subject: [PATCH 1/2] 7151118: Regressions on 7u4 b11 comp. 7u4 b06 on specjvm2008.xml.transform subbenchmark Roll back XalanJ-2271 that caused the regression Reviewed-by: lancea --- .../xml/internal/serializer/CharInfo.java | 324 +++++++---------- .../xml/internal/serializer/ToHTMLStream.java | 4 +- .../xml/internal/serializer/ToStream.java | 334 ++++-------------- .../xml/internal/serializer/ToXMLStream.java | 5 +- 4 files changed, 213 insertions(+), 454 deletions(-) diff --git a/jaxp/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java b/jaxp/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java index 99c05b901bf..ab138efa477 100644 --- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java +++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java @@ -55,7 +55,7 @@ import com.sun.org.apache.xalan.internal.utils.ObjectFactory; final class CharInfo { /** Given a character, lookup a String to output (e.g. a decorated entity reference). */ - private HashMap m_charToString; + private HashMap m_charToString = new HashMap(); /** * The name of the HTML entities file. @@ -72,50 +72,42 @@ final class CharInfo "com.sun.org.apache.xml.internal.serializer.XMLEntities"; /** The horizontal tab character, which the parser should always normalize. */ - static final char S_HORIZONAL_TAB = 0x09; + public static final char S_HORIZONAL_TAB = 0x09; /** The linefeed character, which the parser should always normalize. */ - static final char S_LINEFEED = 0x0A; + public static final char S_LINEFEED = 0x0A; /** The carriage return character, which the parser should always normalize. */ - static final char S_CARRIAGERETURN = 0x0D; - static final char S_SPACE = 0x20; - static final char S_QUOTE = 0x22; - static final char S_LT = 0x3C; - static final char S_GT = 0x3E; - static final char S_NEL = 0x85; - static final char S_LINE_SEPARATOR = 0x2028; + public static final char S_CARRIAGERETURN = 0x0D; /** This flag is an optimization for HTML entities. It false if entities * other than quot (34), amp (38), lt (60) and gt (62) are defined * in the range 0 to 127. * @xsl.usage internal */ - boolean onlyQuotAmpLtGt; + final boolean onlyQuotAmpLtGt; /** Copy the first 0,1 ... ASCII_MAX values into an array */ - static final int ASCII_MAX = 128; + private static final int ASCII_MAX = 128; /** Array of values is faster access than a set of bits - * to quickly check ASCII characters in attribute values, - * the value is true if the character in an attribute value - * should be mapped to a String. + * to quickly check ASCII characters in attribute values. */ - private final boolean[] shouldMapAttrChar_ASCII; + private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX]; /** Array of values is faster access than a set of bits - * to quickly check ASCII characters in text nodes, - * the value is true if the character in a text node - * should be mapped to a String. + * to quickly check ASCII characters in text nodes. */ - private final boolean[] shouldMapTextChar_ASCII; + private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX]; + + private boolean[] isCleanTextASCII = new boolean[ASCII_MAX]; /** An array of bits to record if the character is in the set. * Although information in this array is complete, the * isSpecialAttrASCII array is used first because access to its values * is common and faster. */ - private final int array_of_bits[]; + private int array_of_bits[] = createEmptySetOfIntegers(65535); // 5 for 32 bit words, 6 for 64 bit words ... @@ -146,38 +138,33 @@ final class CharInfo /** - * A base constructor just to explicitly create the fields, - * with the exception of m_charToString which is handled - * by the constructor that delegates base construction to this one. - *

- * m_charToString is not created here only for performance reasons, - * to avoid creating a Hashtable that will be replaced when - * making a mutable copy, {@link #mutableCopyOf(CharInfo)}. + * Constructor that reads in a resource file that describes the mapping of + * characters to entity references. + * This constructor is private, just to force the use + * of the getCharInfo(entitiesResource) factory * + * Resource files must be encoded in UTF-8 and can either be properties + * files with a .properties extension assumed. Alternatively, they can + * have the following form, with no particular extension assumed: + * + *

+     * # First char # is a comment
+     * Entity numericValue
+     * quot 34
+     * amp 38
+     * 
+ * + * @param entitiesResource Name of properties or resource file that should + * be loaded, which describes that mapping of characters to entity + * references. */ - private CharInfo() + private CharInfo(String entitiesResource, String method) { - this.array_of_bits = createEmptySetOfIntegers(65535); - this.firstWordNotUsed = 0; - this.shouldMapAttrChar_ASCII = new boolean[ASCII_MAX]; - this.shouldMapTextChar_ASCII = new boolean[ASCII_MAX]; - this.m_charKey = new CharKey(); - - // Not set here, but in a constructor that uses this one - // this.m_charToString = new Hashtable(); - - this.onlyQuotAmpLtGt = true; - - - return; + this(entitiesResource, method, false); } private CharInfo(String entitiesResource, String method, boolean internal) { - // call the default constructor to create the fields - this(); - m_charToString = new HashMap(); - ResourceBundle entities = null; boolean noExtraEntities = true; @@ -203,10 +190,12 @@ final class CharInfo String name = (String) keys.nextElement(); String value = entities.getString(name); int code = Integer.parseInt(value); - boolean extra = defineEntity(name, (char) code); - if (extra) + defineEntity(name, (char) code); + if (extraEntity(code)) noExtraEntities = false; } + set(S_LINEFEED); + set(S_CARRIAGERETURN); } else { InputStream is = null; @@ -290,8 +279,8 @@ final class CharInfo int code = Integer.parseInt(value); - boolean extra = defineEntity(name, (char) code); - if (extra) + defineEntity(name, (char) code); + if (extraEntity(code)) noExtraEntities = false; } } @@ -300,6 +289,8 @@ final class CharInfo } is.close(); + set(S_LINEFEED); + set(S_CARRIAGERETURN); } catch (Exception e) { throw new RuntimeException( Utils.messages.createMessage( @@ -317,8 +308,31 @@ final class CharInfo } } + /* initialize the array isCleanTextASCII[] with a cache of values + * for use by ToStream.character(char[], int , int) + * and the array isSpecialTextASCII[] with the opposite values + * (all in the name of performance!) + */ + for (int ch = 0; ch Unlike internal entities, character references are a string to single * character mapping. They are used to map non-ASCII characters both on - * parsing and printing, primarily for HTML documents. '&lt;' is an + * parsing and printing, primarily for HTML documents. '<amp;' is an * example of a character reference.

* * @param name The entity's name * @param value The entity's value - * @return true if the mapping is not one of: - * */ - private boolean defineEntity(String name, char value) + private void defineEntity(String name, char value) { StringBuilder sb = new StringBuilder("&"); sb.append(name); sb.append(';'); String entityString = sb.toString(); - boolean extra = defineChar2StringMapping(entityString, value); - return extra; + defineChar2StringMapping(entityString, value); } - /** - * A utility object, just used to map characters to output Strings, - * needed because a HashMap needs to map an object as a key, not a - * Java primitive type, like a char, so this object gets around that - * and it is reusable. - */ - private final CharKey m_charKey; - /** * Map a character to a String. For example given * the character '>' this method would return the fully decorated @@ -413,21 +400,21 @@ final class CharInfo /** * Tell if the character argument that is from - * an attribute value has a mapping to a String. + * an attribute value should have special treatment. * * @param value the value of a character that is in an attribute value * @return true if the character should have any special treatment, * such as when writing out attribute values, - * such as when writing out entity references. + * or entity references. * @xsl.usage internal */ - final boolean shouldMapAttrChar(int value) + final boolean isSpecialAttrChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) - return shouldMapAttrChar_ASCII[value]; + return isSpecialAttrASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). @@ -436,27 +423,48 @@ final class CharInfo /** * Tell if the character argument that is from a - * text node has a mapping to a String, for example - * to map '<' to "<". + * text node should have special treatment. * * @param value the value of a character that is in a text node - * @return true if the character has a mapping to a String, - * such as when writing out entity references. + * @return true if the character should have any special treatment, + * such as when writing out attribute values, + * or entity references. * @xsl.usage internal */ - final boolean shouldMapTextChar(int value) + final boolean isSpecialTextChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) - return shouldMapTextChar_ASCII[value]; + return isSpecialTextASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). return get(value); } + /** + * This method is used to determine if an ASCII character in + * a text node (not an attribute value) is "clean". + * @param value the character to check (0 to 127). + * @return true if the character can go to the writer as-is + * @xsl.usage internal + */ + final boolean isTextASCIIClean(int value) + { + return isCleanTextASCII[value]; + } + +// In the future one might want to use the array directly and avoid +// the method call, but I think the JIT alreay inlines this well enough +// so don't do it (for now) - bjm +// public final boolean[] getASCIIClean() +// { +// return isCleanTextASCII; +// } + + private static CharInfo getCharInfoBasedOnPrivilege( final String entitiesFileName, final String method, final boolean internal){ @@ -491,17 +499,15 @@ final class CharInfo { CharInfo charInfo = (CharInfo) m_getCharInfoCache.get(entitiesFileName); if (charInfo != null) { - return mutableCopyOf(charInfo); + return charInfo; } // try to load it internally - cache try { charInfo = getCharInfoBasedOnPrivilege(entitiesFileName, method, true); - // Put the common copy of charInfo in the cache, but return - // a copy of it. m_getCharInfoCache.put(entitiesFileName, charInfo); - return mutableCopyOf(charInfo); + return charInfo; } catch (Exception e) {} // try to load it externally - do not cache @@ -528,41 +534,7 @@ final class CharInfo method, false); } - /** - * Create a mutable copy of the cached one. - * @param charInfo The cached one. - * @return - */ - private static CharInfo mutableCopyOf(CharInfo charInfo) { - CharInfo copy = new CharInfo(); - - int max = charInfo.array_of_bits.length; - System.arraycopy(charInfo.array_of_bits,0,copy.array_of_bits,0,max); - - copy.firstWordNotUsed = charInfo.firstWordNotUsed; - - max = charInfo.shouldMapAttrChar_ASCII.length; - System.arraycopy(charInfo.shouldMapAttrChar_ASCII,0,copy.shouldMapAttrChar_ASCII,0,max); - - max = charInfo.shouldMapTextChar_ASCII.length; - System.arraycopy(charInfo.shouldMapTextChar_ASCII,0,copy.shouldMapTextChar_ASCII,0,max); - - // utility field copy.m_charKey is already created in the default constructor - - copy.m_charToString = (HashMap) charInfo.m_charToString.clone(); - - copy.onlyQuotAmpLtGt = charInfo.onlyQuotAmpLtGt; - - return copy; - } - - /** - * Table of user-specified char infos. - * The table maps entify file names (the name of the - * property file without the .properties extension) - * to CharInfo objects populated with entities defined in - * corresponding property file. - */ + /** Table of user-specified char infos. */ private static HashMap m_getCharInfoCache = new HashMap(); /** @@ -604,8 +576,7 @@ final class CharInfo * the creation of the set. */ private final void set(int i) { - setASCIItextDirty(i); - setASCIIattrDirty(i); + setASCIIdirty(i); int j = (i >> SHIFT_PER_WORD); // this word is used int k = j + 1; @@ -640,43 +611,24 @@ final class CharInfo return in_the_set; } + // record if there are any entities other than + // quot, amp, lt, gt (probably user defined) /** - * This method returns true if there are some non-standard mappings to - * entities other than quot, amp, lt, gt, and its only purpose is for - * performance. - * @param charToMap The value of the character that is mapped to a String - * @param outputString The String to which the character is mapped, usually - * an entity reference such as "<". - * @return true if the mapping is not one of: - * + * @return true if the entity + * @param code The value of the character that has an entity defined + * for it. */ - private boolean extraEntity(String outputString, int charToMap) + private boolean extraEntity(int entityValue) { boolean extra = false; - if (charToMap < ASCII_MAX) + if (entityValue < 128) { - switch (charToMap) + switch (entityValue) { - case '"' : // quot - if (!outputString.equals(""")) - extra = true; - break; - case '&' : // amp - if (!outputString.equals("&")) - extra = true; - break; - case '<' : // lt - if (!outputString.equals("<")) - extra = true; - break; - case '>' : // gt - if (!outputString.equals(">")) - extra = true; + case 34 : // quot + case 38 : // amp + case 60 : // lt + case 62 : // gt break; default : // other entity in range 0 to 127 extra = true; @@ -686,61 +638,49 @@ final class CharInfo } /** - * If the character is in the ASCII range then - * mark it as needing replacement with - * a String on output if it occurs in a text node. + * If the character is a printable ASCII character then + * mark it as not clean and needing replacement with + * a String on output. * @param ch */ - private void setASCIItextDirty(int j) + private void setASCIIdirty(int j) { if (0 <= j && j < ASCII_MAX) { - shouldMapTextChar_ASCII[j] = true; + isCleanTextASCII[j] = false; + isSpecialTextASCII[j] = true; } } /** - * If the character is in the ASCII range then - * mark it as needing replacement with - * a String on output if it occurs in a attribute value. + * If the character is a printable ASCII character then + * mark it as and not needing replacement with + * a String on output. * @param ch */ - private void setASCIIattrDirty(int j) + private void setASCIIclean(int j) { if (0 <= j && j < ASCII_MAX) { - shouldMapAttrChar_ASCII[j] = true; + isCleanTextASCII[j] = true; + isSpecialTextASCII[j] = false; } } - /** - * Call this method to register a char to String mapping, for example - * to map '<' to "<". - * @param outputString The String to map to. - * @param inputChar The char to map from. - * @return true if the mapping is not one of: - * - */ - boolean defineChar2StringMapping(String outputString, char inputChar) + private void defineChar2StringMapping(String outputString, char inputChar) { CharKey character = new CharKey(inputChar); m_charToString.put(character, outputString); - set(inputChar); // mark the character has having a mapping to a String - - boolean extraMapping = extraEntity(outputString, inputChar); - return extraMapping; - + set(inputChar); } /** * Simple class for fast lookup of char values, when used with * hashtables. You can set the char, then use it as a key. * + * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. + * It exists to cut the serializers dependancy on that package. + * * @xsl.usage internal */ private static class CharKey extends Object diff --git a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java index 94fbce6d64e..aa48eaed8b4 100644 --- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java +++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java @@ -58,7 +58,7 @@ public final class ToHTMLStream extends ToStream * Map that tells which XML characters should have special treatment, and it * provides character to entity name lookup. */ - private final CharInfo m_htmlcharInfo = + private static final CharInfo m_htmlcharInfo = // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); @@ -1369,7 +1369,7 @@ public final class ToHTMLStream extends ToStream // System.out.println("ch: "+(int)ch); // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); - if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch))) + if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) { cleanLength++; } diff --git a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java index cf07a695c83..8c4c2ec8892 100644 --- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java +++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java @@ -919,8 +919,7 @@ abstract public class ToStream extends SerializerBase { // This is the old/fast code here, but is this // correct for all encodings? - if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch || - CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch)) + if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch)) ret= true; else ret = false; @@ -1029,7 +1028,7 @@ abstract public class ToStream extends SerializerBase * * @throws java.io.IOException */ - int accumDefaultEntity( + protected int accumDefaultEntity( java.io.Writer writer, char ch, int i, @@ -1048,7 +1047,7 @@ abstract public class ToStream extends SerializerBase { // if this is text node character and a special one of those, // or if this is a character from attribute value and a special one of those - if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))) + if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))) { String outputStringForChar = m_charInfo.getOutputStringForChar(ch); @@ -1399,6 +1398,7 @@ abstract public class ToStream extends SerializerBase if (m_cdataTagOpen) closeCDATA(); + // the check with _escaping is a bit of a hack for XLSTC if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping)) { @@ -1421,173 +1421,82 @@ abstract public class ToStream extends SerializerBase try { int i; + char ch1; int startClean; // skip any leading whitspace // don't go off the end and use a hand inlined version // of isWhitespace(ch) final int end = start + length; - int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed - // that was processed - final Writer writer = m_writer; - boolean isAllWhitespace = true; - - // process any leading whitspace - i = start; - while (i < end && isAllWhitespace) { - char ch1 = chars[i]; - - if (m_charInfo.shouldMapTextChar(ch1)) { - // The character is supposed to be replaced by a String - // so write out the clean whitespace characters accumulated - // so far - // then the String. - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - String outputStringForChar = m_charInfo - .getOutputStringForChar(ch1); - writer.write(outputStringForChar); - // We can't say that everything we are writing out is - // all whitespace, we just wrote out a String. - isAllWhitespace = false; - lastDirtyCharProcessed = i; // mark the last non-clean - // character processed - i++; - } else { - // The character is clean, but is it a whitespace ? - switch (ch1) { - // TODO: Any other whitespace to consider? - case CharInfo.S_SPACE: - // Just accumulate the clean whitespace - i++; - break; - case CharInfo.S_LINEFEED: - lastDirtyCharProcessed = processLineFeed(chars, i, - lastDirtyCharProcessed, writer); - i++; - break; - case CharInfo.S_CARRIAGERETURN: - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - writer.write(" "); - lastDirtyCharProcessed = i; - i++; - break; - case CharInfo.S_HORIZONAL_TAB: - // Just accumulate the clean whitespace - i++; - break; - default: - // The character was clean, but not a whitespace - // so break the loop to continue with this character - // (we don't increment index i !!) - isAllWhitespace = false; - break; + int lastDirty = start - 1; // last character that needed processing + for (i = start; + ((i < end) + && ((ch1 = chars[i]) == 0x20 + || (ch1 == 0xA && m_lineSepUse) + || ch1 == 0xD + || ch1 == 0x09)); + i++) + { + /* + * We are processing leading whitespace, but are doing the same + * processing for dirty characters here as for non-whitespace. + * + */ + if (!m_charInfo.isTextASCIIClean(ch1)) + { + lastDirty = processDirty(chars,end, i,ch1, lastDirty, true); + i = lastDirty; } } - } /* If there is some non-whitespace, mark that we may need * to preserve this. This is only important if we have indentation on. */ - if (i < end || !isAllWhitespace) + if (i < end) m_ispreserve = true; + +// int lengthClean; // number of clean characters in a row +// final boolean[] isAsciiClean = m_charInfo.getASCIIClean(); + + final boolean isXML10 = XMLVERSION10.equals(getVersion()); + // we've skipped the leading whitespace, now deal with the rest for (; i < end; i++) { - char ch = chars[i]; - - if (m_charInfo.shouldMapTextChar(ch)) { - // The character is supposed to be replaced by a String - // e.g. '&' --> "&" - // e.g. '<' --> "<" - writeOutCleanChars(chars, i, lastDirtyCharProcessed); - String outputStringForChar = m_charInfo.getOutputStringForChar(ch); - writer.write(outputStringForChar); - lastDirtyCharProcessed = i; - } - else { - if (ch <= 0x1F) { - // Range 0x00 through 0x1F inclusive - // - // This covers the non-whitespace control characters - // in the range 0x1 to 0x1F inclusive. - // It also covers the whitespace control characters in the same way: - // 0x9 TAB - // 0xA NEW LINE - // 0xD CARRIAGE RETURN - // - // We also cover 0x0 ... It isn't valid - // but we will output "�" - - // The default will handle this just fine, but this - // is a little performance boost to handle the more - // common TAB, NEW-LINE, CARRIAGE-RETURN - switch (ch) { - - case CharInfo.S_HORIZONAL_TAB: - // Leave whitespace TAB as a real character + { + // A tight loop to skip over common clean chars + // This tight loop makes it easier for the JIT + // to optimize. + char ch2; + while (i startClean) { int lengthClean = i - startClean; @@ -1606,32 +1515,6 @@ abstract public class ToStream extends SerializerBase if (m_tracer != null) super.fireCharEvent(chars, start, length); } - - private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException { - if (!m_lineSepUse - || (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){ - // We are leaving the new-line alone, and it is just - // being added to the 'clean' characters, - // so the last dirty character processed remains unchanged - } - else { - writeOutCleanChars(chars, i, lastProcessed); - writer.write(m_lineSep, 0, m_lineSepLen); - lastProcessed = i; - } - return lastProcessed; - } - - private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException { - int startClean; - startClean = lastProcessed + 1; - if (startClean < i) - { - int lengthClean = i - startClean; - m_writer.write(chars, startClean, lengthClean); - } - } - /** * This method checks if a given character is between C0 or C1 range * of Control characters. @@ -1751,7 +1634,7 @@ abstract public class ToStream extends SerializerBase * * @throws org.xml.sax.SAXException */ - private int accumDefaultEscape( + protected int accumDefaultEscape( Writer writer, char ch, int i, @@ -1815,15 +1698,16 @@ abstract public class ToStream extends SerializerBase * to write it out as Numeric Character Reference(NCR) regardless of XML Version * being used for output document. */ - if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch)) + if (isCharacterInC0orC1Range(ch) || + (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch))) { writer.write("&#"); writer.write(Integer.toString(ch)); writer.write(';'); } else if ((!escapingNotNeeded(ch) || - ( (fromTextNode && m_charInfo.shouldMapTextChar(ch)) - || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))) + ( (fromTextNode && m_charInfo.isSpecialTextChar(ch)) + || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))) && m_elemContext.m_currentElemDepth > 0) { writer.write("&#"); @@ -2087,86 +1971,28 @@ abstract public class ToStream extends SerializerBase string.getChars(0,len, m_attrBuff, 0); final char[] stringChars = m_attrBuff; - for (int i = 0; i < len;) + for (int i = 0; i < len; ) { char ch = stringChars[i]; + if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch))) + { + writer.write(ch); + i++; + } + else + { // I guess the parser doesn't normalize cr/lf in attributes. -sb +// if ((CharInfo.S_CARRIAGERETURN == ch) +// && ((i + 1) < len) +// && (CharInfo.S_LINEFEED == stringChars[i + 1])) +// { +// i++; +// ch = CharInfo.S_LINEFEED; +// } - if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) { - // The character is supposed to be replaced by a String - // e.g. '&' --> "&" - // e.g. '<' --> "<" i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true); } - else { - i++; - if (0x0 <= ch && ch <= 0x1F) { - // Range 0x00 through 0x1F inclusive - // This covers the non-whitespace control characters - // in the range 0x1 to 0x1F inclusive. - // It also covers the whitespace control characters in the same way: - // 0x9 TAB - // 0xA NEW LINE - // 0xD CARRIAGE RETURN - // - // We also cover 0x0 ... It isn't valid - // but we will output "�" - - // The default will handle this just fine, but this - // is a little performance boost to handle the more - // common TAB, NEW-LINE, CARRIAGE-RETURN - switch (ch) { - - case CharInfo.S_HORIZONAL_TAB: - writer.write(" "); - break; - case CharInfo.S_LINEFEED: - writer.write(" "); - break; - case CharInfo.S_CARRIAGERETURN: - writer.write(" "); - break; - default: - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - break; - } - } - else if (ch < 0x7F) { - // Range 0x20 through 0x7E inclusive - // Normal ASCII chars - writer.write(ch); - } - else if (ch <= 0x9F){ - // Range 0x7F through 0x9F inclusive - // More control characters - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - } - else if (ch == CharInfo.S_LINE_SEPARATOR) { - // LINE SEPARATOR - writer.write("
"); - } - else if (m_encodingInfo.isInEncoding(ch)) { - // If the character is in the encoding, and - // not in the normal ASCII range, we also - // just write it out - writer.write(ch); - } - else { - // This is a fallback plan, we should never get here - // but if the character wasn't previously handled - // (i.e. isn't in the encoding, etc.) then what - // should we do? We choose to write out a character ref - writer.write("&#"); - writer.write(Integer.toString(ch)); - writer.write(';'); - } - } - } } /** @@ -2936,14 +2762,6 @@ abstract public class ToStream extends SerializerBase closeCDATA(); m_cdataTagOpen = false; } - if (m_writer != null) { - try { - m_writer.flush(); - } - catch(IOException e) { - // what? me worry? - } - } } public void setContentHandler(ContentHandler ch) diff --git a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java index 51e1331144a..e64cc3face8 100644 --- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java +++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java @@ -56,7 +56,7 @@ public final class ToXMLStream extends ToStream * Map that tells which XML characters should have special treatment, and it * provides character to entity name lookup. */ - private CharInfo m_xmlcharInfo = + private static CharInfo m_xmlcharInfo = // new CharInfo(CharInfo.XML_ENTITIES_RESOURCE); CharInfo.getCharInfo(CharInfo.XML_ENTITIES_RESOURCE, Method.XML); @@ -329,11 +329,12 @@ public final class ToXMLStream extends ToStream /** * Before Xalan 1497, a newline char was printed out if not inside of an - * element. The whitespace is not significant if the output is standalone + * element. The whitespace is not significant is the output is standalone */ if (m_elemContext.m_currentElemDepth <= 0 && m_isStandalone) writer.write(m_lineSep, 0, m_lineSepLen); + /* * Don't write out any indentation whitespace now, * because there may be non-whitespace text after this. From 478e52def477c6272bf2207cb24d70d9c3cabd5c Mon Sep 17 00:00:00 2001 From: Joe Wang Date: Fri, 8 Jun 2012 11:28:29 -0700 Subject: [PATCH 2/2] 7157608: One feature is not recognized Adding feature standard-uri-conformant into the recognized list Reviewed-by: psandoz --- .../sun/org/apache/xerces/internal/impl/XMLEntityManager.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java index c1ddeb6cb59..7f3295d1479 100644 --- a/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java +++ b/jaxp/src/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java @@ -182,7 +182,8 @@ protected static final String PARSER_SETTINGS = EXTERNAL_GENERAL_ENTITIES, EXTERNAL_PARAMETER_ENTITIES, ALLOW_JAVA_ENCODINGS, - WARN_ON_DUPLICATE_ENTITYDEF + WARN_ON_DUPLICATE_ENTITYDEF, + STANDARD_URI_CONFORMANT }; /** Feature defaults. */ @@ -192,6 +193,7 @@ protected static final String PARSER_SETTINGS = Boolean.TRUE, Boolean.TRUE, Boolean.FALSE, + Boolean.FALSE }; /** Recognized properties. */