7151118: Regressions on 7u4 b11 comp. 7u4 b06 on specjvm2008.xml.transform subbenchmark

Roll back XalanJ-2271 that caused the regression Reviewed-by: lancea
2012-06-07 13:47:53 -07:00 · 2012-06-07 13:47:53 -07:00 · aca1037e9f
commit aca1037e9f
parent 6687eed677
4 changed files with 213 additions and 454 deletions
--- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java
+++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/CharInfo.java
@ -55,7 +55,7 @@ import com.sun.org.apache.xalan.internal.utils.ObjectFactory;
 final class CharInfo
 {
    /** Given a character, lookup a String to output (e.g. a decorated entity reference). */
-    private HashMap m_charToString;
+    private HashMap m_charToString = new HashMap();

    /**
     * The name of the HTML entities file.
@ -72,50 +72,42 @@ final class CharInfo
                "com.sun.org.apache.xml.internal.serializer.XMLEntities";

    /** The horizontal tab character, which the parser should always normalize. */
-    static final char S_HORIZONAL_TAB = 0x09;
+    public static final char S_HORIZONAL_TAB = 0x09;

    /** The linefeed character, which the parser should always normalize. */
-    static final char S_LINEFEED = 0x0A;
+    public static final char S_LINEFEED = 0x0A;

    /** The carriage return character, which the parser should always normalize. */
-    static final char S_CARRIAGERETURN = 0x0D;
-    static final char S_SPACE = 0x20;
-    static final char S_QUOTE = 0x22;
-    static final char S_LT = 0x3C;
-    static final char S_GT = 0x3E;
-    static final char S_NEL = 0x85;
-    static final char S_LINE_SEPARATOR = 0x2028;
+    public static final char S_CARRIAGERETURN = 0x0D;

    /** This flag is an optimization for HTML entities. It false if entities
     * other than quot (34), amp (38), lt (60) and gt (62) are defined
     * in the range 0 to 127.
     * @xsl.usage internal
     */
-    boolean onlyQuotAmpLtGt;
+    final boolean onlyQuotAmpLtGt;

    /** Copy the first 0,1 ... ASCII_MAX values into an array */
-    static final int ASCII_MAX = 128;
+    private static final int ASCII_MAX = 128;

    /** Array of values is faster access than a set of bits
-     * to quickly check ASCII characters in attribute values,
-     * the value is true if the character in an attribute value
-     * should be mapped to a String.
+     * to quickly check ASCII characters in attribute values.
     */
-    private final boolean[] shouldMapAttrChar_ASCII;
+    private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX];

    /** Array of values is faster access than a set of bits
-     * to quickly check ASCII characters in text nodes,
-     * the value is true if the character in a text node
-     * should be mapped to a String.
+     * to quickly check ASCII characters in text nodes.
     */
-    private final boolean[] shouldMapTextChar_ASCII;
+    private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX];
+
+    private boolean[] isCleanTextASCII = new boolean[ASCII_MAX];

    /** An array of bits to record if the character is in the set.
     * Although information in this array is complete, the
     * isSpecialAttrASCII array is used first because access to its values
     * is common and faster.
     */
-    private final int array_of_bits[];
+    private int array_of_bits[] = createEmptySetOfIntegers(65535);


    // 5 for 32 bit words,  6 for 64 bit words ...
@ -146,38 +138,33 @@ final class CharInfo


    /**
-     * A base constructor just to explicitly create the fields,
-     * with the exception of m_charToString which is handled
-     * by the constructor that delegates base construction to this one.
-     * <p>
-     * m_charToString is not created here only for performance reasons,
-     * to avoid creating a Hashtable that will be replaced when
-     * making a mutable copy, {@link #mutableCopyOf(CharInfo)}.
+     * Constructor that reads in a resource file that describes the mapping of
+     * characters to entity references.
+     * This constructor is private, just to force the use
+     * of the getCharInfo(entitiesResource) factory
     *
+     * Resource files must be encoded in UTF-8 and can either be properties
+     * files with a .properties extension assumed.  Alternatively, they can
+     * have the following form, with no particular extension assumed:
+     *
+     * <pre>
+     * # First char # is a comment
+     * Entity numericValue
+     * quot 34
+     * amp 38
+     * </pre>
+     *
+     * @param entitiesResource Name of properties or resource file that should
+     * be loaded, which describes that mapping of characters to entity
+     * references.
     */
-    private CharInfo()
+    private CharInfo(String entitiesResource, String method)
    {
-        this.array_of_bits = createEmptySetOfIntegers(65535);
-        this.firstWordNotUsed = 0;
-        this.shouldMapAttrChar_ASCII = new boolean[ASCII_MAX];
-        this.shouldMapTextChar_ASCII = new boolean[ASCII_MAX];
-        this.m_charKey = new CharKey();
-
-        // Not set here, but in a constructor that uses this one
-        // this.m_charToString =  new Hashtable();
-
-        this.onlyQuotAmpLtGt = true;
-
-
-        return;
+        this(entitiesResource, method, false);
    }

    private CharInfo(String entitiesResource, String method, boolean internal)
    {
-        // call the default constructor to create the fields
-        this();
-        m_charToString = new HashMap();
-
        ResourceBundle entities = null;
        boolean noExtraEntities = true;

@ -203,10 +190,12 @@ final class CharInfo
                String name = (String) keys.nextElement();
                String value = entities.getString(name);
                int code = Integer.parseInt(value);
-                boolean extra = defineEntity(name, (char) code);
-                if (extra)
+                defineEntity(name, (char) code);
+                if (extraEntity(code))
                    noExtraEntities = false;
            }
+            set(S_LINEFEED);
+            set(S_CARRIAGERETURN);
        } else {
            InputStream is = null;

@ -290,8 +279,8 @@ final class CharInfo

                            int code = Integer.parseInt(value);

-                            boolean extra = defineEntity(name, (char) code);
-                            if (extra)
+                            defineEntity(name, (char) code);
+                            if (extraEntity(code))
                                noExtraEntities = false;
                        }
                    }
@ -300,6 +289,8 @@ final class CharInfo
                }

                is.close();
+                set(S_LINEFEED);
+                set(S_CARRIAGERETURN);
            } catch (Exception e) {
                throw new RuntimeException(
                    Utils.messages.createMessage(
@ -317,8 +308,31 @@ final class CharInfo
            }
        }

+        /* initialize the array isCleanTextASCII[] with a cache of values
+         * for use by ToStream.character(char[], int , int)
+         * and the array isSpecialTextASCII[] with the opposite values
+         * (all in the name of performance!)
+         */
+        for (int ch = 0; ch <ASCII_MAX; ch++)
+        if((((0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 == ch)))
+             && (!get(ch))) || ('"' == ch))
+        {
+            isCleanTextASCII[ch] = true;
+            isSpecialTextASCII[ch] = false;
+        }
+        else {
+            isCleanTextASCII[ch] = false;
+            isSpecialTextASCII[ch] = true;
+        }
+
+
+
        onlyQuotAmpLtGt = noExtraEntities;

+        // initialize the array with a cache of the BitSet values
+        for (int i=0; i<ASCII_MAX; i++)
+            isSpecialAttrASCII[i] = get(i);
+
        /* Now that we've used get(ch) just above to initialize the
         * two arrays we will change by adding a tab to the set of
         * special chars for XML (but not HTML!).
@ -330,19 +344,8 @@ final class CharInfo
         */
        if (Method.XML.equals(method))
        {
-            // We choose not to escape the quotation mark as &quot; in text nodes
-            shouldMapTextChar_ASCII[S_QUOTE] = false;
+            isSpecialAttrASCII[S_HORIZONAL_TAB] = true;
        }
-
-        if (Method.HTML.equals(method)) {
-                // The XSLT 1.0 recommendation says
-                // "The html output method should not escape < characters occurring in attribute values."
-                // So we don't escape '<' in an attribute for HTML
-                shouldMapAttrChar_ASCII['<'] = false;
-
-                // We choose not to escape the quotation mark as &quot; in text nodes.
-            shouldMapTextChar_ASCII[S_QUOTE] = false;
-    }
    }

    /**
@ -350,38 +353,22 @@ final class CharInfo
     * supplied. Nothing happens if the character reference is already defined.
     * <p>Unlike internal entities, character references are a string to single
     * character mapping. They are used to map non-ASCII characters both on
-     * parsing and printing, primarily for HTML documents. '&amp;lt;' is an
+     * parsing and printing, primarily for HTML documents. '&lt;amp;' is an
     * example of a character reference.</p>
     *
     * @param name The entity's name
     * @param value The entity's value
-     * @return true if the mapping is not one of:
-     * <ul>
-     * <li> '<' to "&lt;"
-     * <li> '>' to "&gt;"
-     * <li> '&' to "&amp;"
-     * <li> '"' to "&quot;"
-     * </ul>
     */
-    private boolean defineEntity(String name, char value)
+    private void defineEntity(String name, char value)
    {
        StringBuilder sb = new StringBuilder("&");
        sb.append(name);
        sb.append(';');
        String entityString = sb.toString();

-        boolean extra = defineChar2StringMapping(entityString, value);
-        return extra;
+        defineChar2StringMapping(entityString, value);
    }

-    /**
-     * A utility object, just used to map characters to output Strings,
-     * needed because a HashMap needs to map an object as a key, not a
-     * Java primitive type, like a char, so this object gets around that
-     * and it is reusable.
-     */
-    private final CharKey m_charKey;
-
    /**
     * Map a character to a String. For example given
     * the character '>' this method would return the fully decorated
@ -413,21 +400,21 @@ final class CharInfo

    /**
     * Tell if the character argument that is from
-     * an attribute value has a mapping to a String.
+     * an attribute value should have special treatment.
     *
     * @param value the value of a character that is in an attribute value
     * @return true if the character should have any special treatment,
     * such as when writing out attribute values,
-     * such as when writing out entity references.
+     * or entity references.
     * @xsl.usage internal
     */
-    final boolean shouldMapAttrChar(int value)
+    final boolean isSpecialAttrChar(int value)
    {
        // for performance try the values in the boolean array first,
        // this is faster access than the BitSet for common ASCII values

        if (value < ASCII_MAX)
-            return shouldMapAttrChar_ASCII[value];
+            return isSpecialAttrASCII[value];

        // rather than java.util.BitSet, our private
        // implementation is faster (and less general).
@ -436,27 +423,48 @@ final class CharInfo

    /**
     * Tell if the character argument that is from a
-     * text node has a mapping to a String, for example
-     * to map '<' to "&lt;".
+     * text node should have special treatment.
     *
     * @param value the value of a character that is in a text node
-     * @return true if the character has a mapping to a String,
-     * such as when writing out entity references.
+     * @return true if the character should have any special treatment,
+     * such as when writing out attribute values,
+     * or entity references.
     * @xsl.usage internal
     */
-    final boolean shouldMapTextChar(int value)
+    final boolean isSpecialTextChar(int value)
    {
        // for performance try the values in the boolean array first,
        // this is faster access than the BitSet for common ASCII values

        if (value < ASCII_MAX)
-            return shouldMapTextChar_ASCII[value];
+            return isSpecialTextASCII[value];

        // rather than java.util.BitSet, our private
        // implementation is faster (and less general).
        return get(value);
    }

+    /**
+     * This method is used to determine if an ASCII character in
+     * a text node (not an attribute value) is "clean".
+     * @param value the character to check (0 to 127).
+     * @return true if the character can go to the writer as-is
+     * @xsl.usage internal
+     */
+    final boolean isTextASCIIClean(int value)
+    {
+        return isCleanTextASCII[value];
+    }
+
+//  In the future one might want to use the array directly and avoid
+//  the method call, but I think the JIT alreay inlines this well enough
+//  so don't do it (for now) - bjm
+//    public final boolean[] getASCIIClean()
+//    {
+//        return isCleanTextASCII;
+//    }
+
+
    private static CharInfo getCharInfoBasedOnPrivilege(
        final String entitiesFileName, final String method,
        final boolean internal){
@ -491,17 +499,15 @@ final class CharInfo
    {
        CharInfo charInfo = (CharInfo) m_getCharInfoCache.get(entitiesFileName);
        if (charInfo != null) {
-            return mutableCopyOf(charInfo);
+            return charInfo;
        }

        // try to load it internally - cache
        try {
            charInfo = getCharInfoBasedOnPrivilege(entitiesFileName,
                                        method, true);
-            // Put the common copy of charInfo in the cache, but return
-            // a copy of it.
            m_getCharInfoCache.put(entitiesFileName, charInfo);
-            return mutableCopyOf(charInfo);
+            return charInfo;
        } catch (Exception e) {}

        // try to load it externally - do not cache
@ -528,41 +534,7 @@ final class CharInfo
                                method, false);
    }

-    /**
-     * Create a mutable copy of the cached one.
-     * @param charInfo The cached one.
-     * @return
-     */
-    private static CharInfo mutableCopyOf(CharInfo charInfo) {
-        CharInfo copy = new CharInfo();
-
-        int max = charInfo.array_of_bits.length;
-        System.arraycopy(charInfo.array_of_bits,0,copy.array_of_bits,0,max);
-
-        copy.firstWordNotUsed = charInfo.firstWordNotUsed;
-
-        max = charInfo.shouldMapAttrChar_ASCII.length;
-        System.arraycopy(charInfo.shouldMapAttrChar_ASCII,0,copy.shouldMapAttrChar_ASCII,0,max);
-
-        max = charInfo.shouldMapTextChar_ASCII.length;
-        System.arraycopy(charInfo.shouldMapTextChar_ASCII,0,copy.shouldMapTextChar_ASCII,0,max);
-
-        // utility field copy.m_charKey is already created in the default constructor
-
-        copy.m_charToString = (HashMap) charInfo.m_charToString.clone();
-
-        copy.onlyQuotAmpLtGt = charInfo.onlyQuotAmpLtGt;
-
-                return copy;
-        }
-
-        /**
-         * Table of user-specified char infos.
-         * The table maps entify file names (the name of the
-         * property file without the .properties extension)
-         * to CharInfo objects populated with entities defined in
-         * corresponding property file.
-         */
+    /** Table of user-specified char infos. */
    private static HashMap m_getCharInfoCache = new HashMap();

    /**
@ -604,8 +576,7 @@ final class CharInfo
     * the creation of the set.
     */
    private final void set(int i) {
-        setASCIItextDirty(i);
-        setASCIIattrDirty(i);
+        setASCIIdirty(i);

        int j = (i >> SHIFT_PER_WORD); // this word is used
        int k = j + 1;
@ -640,43 +611,24 @@ final class CharInfo
        return in_the_set;
    }

+    // record if there are any entities other than
+    // quot, amp, lt, gt  (probably user defined)
    /**
-     * This method returns true if there are some non-standard mappings to
-     * entities other than quot, amp, lt, gt, and its only purpose is for
-     * performance.
-     * @param charToMap The value of the character that is mapped to a String
-     * @param outputString The String to which the character is mapped, usually
-     * an entity reference such as "&lt;".
-     * @return true if the mapping is not one of:
-     * <ul>
-     * <li> '<' to "&lt;"
-     * <li> '>' to "&gt;"
-     * <li> '&' to "&amp;"
-     * <li> '"' to "&quot;"
-     * </ul>
+     * @return true if the entity
+     * @param code The value of the character that has an entity defined
+     * for it.
     */
-    private boolean extraEntity(String outputString, int charToMap)
+    private boolean extraEntity(int entityValue)
    {
        boolean extra = false;
-        if (charToMap < ASCII_MAX)
+        if (entityValue < 128)
        {
-            switch (charToMap)
+            switch (entityValue)
            {
-                case '"' : // quot
-                        if (!outputString.equals("&quot;"))
-                                extra = true;
-                    break;
-                case '&' : // amp
-                        if (!outputString.equals("&amp;"))
-                                extra = true;
-                        break;
-                case '<' : // lt
-                        if (!outputString.equals("&lt;"))
-                                extra = true;
-                        break;
-                case '>' : // gt
-                        if (!outputString.equals("&gt;"))
-                                extra = true;
+                case 34 : // quot
+                case 38 : // amp
+                case 60 : // lt
+                case 62 : // gt
                    break;
                default : // other entity in range 0 to 127
                    extra = true;
@ -686,61 +638,49 @@ final class CharInfo
    }

    /**
-     * If the character is in the ASCII range then
-     * mark it as needing replacement with
-     * a String on output if it occurs in a text node.
+     * If the character is a printable ASCII character then
+     * mark it as not clean and needing replacement with
+     * a String on output.
     * @param ch
     */
-    private void setASCIItextDirty(int j)
+    private void setASCIIdirty(int j)
    {
        if (0 <= j && j < ASCII_MAX)
        {
-            shouldMapTextChar_ASCII[j] = true;
+            isCleanTextASCII[j] = false;
+            isSpecialTextASCII[j] = true;
        }
    }

    /**
-     * If the character is in the ASCII range then
-     * mark it as needing replacement with
-     * a String on output if it occurs in a attribute value.
+     * If the character is a printable ASCII character then
+     * mark it as and not needing replacement with
+     * a String on output.
     * @param ch
     */
-    private void setASCIIattrDirty(int j)
+    private void setASCIIclean(int j)
    {
        if (0 <= j && j < ASCII_MAX)
        {
-            shouldMapAttrChar_ASCII[j] = true;
+            isCleanTextASCII[j] = true;
+            isSpecialTextASCII[j] = false;
        }
    }

-    /**
-     * Call this method to register a char to String mapping, for example
-     * to map '<' to "&lt;".
-     * @param outputString The String to map to.
-     * @param inputChar The char to map from.
-     * @return true if the mapping is not one of:
-     * <ul>
-     * <li> '<' to "&lt;"
-     * <li> '>' to "&gt;"
-     * <li> '&' to "&amp;"
-     * <li> '"' to "&quot;"
-     * </ul>
-     */
-    boolean defineChar2StringMapping(String outputString, char inputChar)
+    private void defineChar2StringMapping(String outputString, char inputChar)
    {
        CharKey character = new CharKey(inputChar);
        m_charToString.put(character, outputString);
-        set(inputChar);  // mark the character has having a mapping to a String
-
-        boolean extraMapping = extraEntity(outputString, inputChar);
-        return extraMapping;
-
+        set(inputChar);
    }

    /**
     * Simple class for fast lookup of char values, when used with
     * hashtables.  You can set the char, then use it as a key.
     *
+     * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
+     * It exists to cut the serializers dependancy on that package.
+     *
     * @xsl.usage internal
     */
    private static class CharKey extends Object
--- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java
+++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToHTMLStream.java
@ -58,7 +58,7 @@ public final class ToHTMLStream extends ToStream
     * Map that tells which XML characters should have special treatment, and it
     *  provides character to entity name lookup.
     */
-    private final CharInfo m_htmlcharInfo =
+    private static final CharInfo m_htmlcharInfo =
 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
        CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);

@ -1369,7 +1369,7 @@ public final class ToHTMLStream extends ToStream
            // System.out.println("ch: "+(int)ch);
            // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
            // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
-            if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch)))
+            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
            {
                cleanLength++;
            }
--- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java
+++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java
@ -919,8 +919,7 @@ abstract public class ToStream extends SerializerBase
        {
            // This is the old/fast code here, but is this
            // correct for all encodings?
-            if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch ||
-                    CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch))
+            if (ch >= 0x20 || (0x0A == ch || 0x0D == ch || 0x09 == ch))
                ret= true;
            else
                ret = false;
@ -1029,7 +1028,7 @@ abstract public class ToStream extends SerializerBase
     *
     * @throws java.io.IOException
     */
-    int accumDefaultEntity(
+    protected int accumDefaultEntity(
        java.io.Writer writer,
        char ch,
        int i,
@ -1048,7 +1047,7 @@ abstract public class ToStream extends SerializerBase
        {
            // if this is text node character and a special one of those,
            // or if this is a character from attribute value and a special one of those
-            if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))
+            if ((fromTextNode && m_charInfo.isSpecialTextChar(ch)) || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch)))
            {
                String outputStringForChar = m_charInfo.getOutputStringForChar(ch);

@ -1399,6 +1398,7 @@ abstract public class ToStream extends SerializerBase

        if (m_cdataTagOpen)
            closeCDATA();
+        // the check with _escaping is a bit of a hack for XLSTC

        if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
        {
@ -1421,173 +1421,82 @@ abstract public class ToStream extends SerializerBase
        try
        {
            int i;
+            char ch1;
            int startClean;

            // skip any leading whitspace
            // don't go off the end and use a hand inlined version
            // of isWhitespace(ch)
            final int end = start + length;
-            int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed
-                                                                                                        // that was processed
-            final Writer writer = m_writer;
-            boolean isAllWhitespace = true;
-
-            // process any leading whitspace
-            i = start;
-            while (i < end && isAllWhitespace) {
-                char ch1 = chars[i];
-
-                if (m_charInfo.shouldMapTextChar(ch1)) {
-                    // The character is supposed to be replaced by a String
-                    // so write out the clean whitespace characters accumulated
-                    // so far
-                    // then the String.
-                    writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                    String outputStringForChar = m_charInfo
-                            .getOutputStringForChar(ch1);
-                    writer.write(outputStringForChar);
-                    // We can't say that everything we are writing out is
-                    // all whitespace, we just wrote out a String.
-                    isAllWhitespace = false;
-                    lastDirtyCharProcessed = i; // mark the last non-clean
-                    // character processed
-                    i++;
-                } else {
-                    // The character is clean, but is it a whitespace ?
-                    switch (ch1) {
-                    // TODO: Any other whitespace to consider?
-                    case CharInfo.S_SPACE:
-                        // Just accumulate the clean whitespace
-                        i++;
-                        break;
-                    case CharInfo.S_LINEFEED:
-                        lastDirtyCharProcessed = processLineFeed(chars, i,
-                                lastDirtyCharProcessed, writer);
-                        i++;
-                        break;
-                    case CharInfo.S_CARRIAGERETURN:
-                        writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                        writer.write("&#13;");
-                        lastDirtyCharProcessed = i;
-                        i++;
-                        break;
-                    case CharInfo.S_HORIZONAL_TAB:
-                        // Just accumulate the clean whitespace
-                        i++;
-                        break;
-                    default:
-                        // The character was clean, but not a whitespace
-                        // so break the loop to continue with this character
-                        // (we don't increment index i !!)
-                        isAllWhitespace = false;
-                        break;
+            int lastDirty = start - 1; // last character that needed processing
+            for (i = start;
+                ((i < end)
+                    && ((ch1 = chars[i]) == 0x20
+                        || (ch1 == 0xA && m_lineSepUse)
+                        || ch1 == 0xD
+                        || ch1 == 0x09));
+                i++)
+            {
+                /*
+                 * We are processing leading whitespace, but are doing the same
+                 * processing for dirty characters here as for non-whitespace.
+                 *
+                 */
+                if (!m_charInfo.isTextASCIIClean(ch1))
+                {
+                    lastDirty = processDirty(chars,end, i,ch1, lastDirty, true);
+                    i = lastDirty;
                }
            }
-            }
            /* If there is some non-whitespace, mark that we may need
             * to preserve this. This is only important if we have indentation on.
             */
-            if (i < end || !isAllWhitespace)
+            if (i < end)
                m_ispreserve = true;

+
+//            int lengthClean;    // number of clean characters in a row
+//            final boolean[] isAsciiClean = m_charInfo.getASCIIClean();
+
+            final boolean isXML10 = XMLVERSION10.equals(getVersion());
+            // we've skipped the leading whitespace, now deal with the rest
            for (; i < end; i++)
            {
-                char ch = chars[i];
-
-                if (m_charInfo.shouldMapTextChar(ch)) {
-                    // The character is supposed to be replaced by a String
-                    // e.g.   '&'  -->  "&amp;"
-                    // e.g.   '<'  -->  "&lt;"
-                    writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                    String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
-                    writer.write(outputStringForChar);
-                    lastDirtyCharProcessed = i;
-                }
-                else {
-                    if (ch <= 0x1F) {
-                        // Range 0x00 through 0x1F inclusive
-                        //
-                        // This covers the non-whitespace control characters
-                        // in the range 0x1 to 0x1F inclusive.
-                        // It also covers the whitespace control characters in the same way:
-                        // 0x9   TAB
-                        // 0xA   NEW LINE
-                        // 0xD   CARRIAGE RETURN
-                        //
-                        // We also cover 0x0 ... It isn't valid
-                        // but we will output "&#0;"
-
-                        // The default will handle this just fine, but this
-                        // is a little performance boost to handle the more
-                        // common TAB, NEW-LINE, CARRIAGE-RETURN
-                        switch (ch) {
-
-                        case CharInfo.S_HORIZONAL_TAB:
-                            // Leave whitespace TAB as a real character
+                {
+                    // A tight loop to skip over common clean chars
+                    // This tight loop makes it easier for the JIT
+                    // to optimize.
+                    char ch2;
+                    while (i<end
+                            && ((ch2 = chars[i])<127)
+                            && m_charInfo.isTextASCIIClean(ch2))
+                            i++;
+                    if (i == end)
                        break;
-                        case CharInfo.S_LINEFEED:
-                            lastDirtyCharProcessed = processLineFeed(chars, i, lastDirtyCharProcessed, writer);
-                            break;
-                        case CharInfo.S_CARRIAGERETURN:
-                                writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                                writer.write("&#13;");
-                                lastDirtyCharProcessed = i;
-                            // Leave whitespace carriage return as a real character
-                            break;
-                        default:
-                            writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                            writer.write("&#");
-                            writer.write(Integer.toString(ch));
-                            writer.write(';');
-                            lastDirtyCharProcessed = i;
-                            break;
-
                }
-                    }
-                    else if (ch < 0x7F) {
-                        // Range 0x20 through 0x7E inclusive
-                        // Normal ASCII chars, do nothing, just add it to
-                        // the clean characters

+                final char ch = chars[i];
+                /*  The check for isCharacterInC0orC1Ranger and
+                 *  isNELorLSEPCharacter has been added
+                 *  to support Control Characters in XML 1.1
+                 */
+                if (!isCharacterInC0orC1Range(ch) &&
+                    (isXML10 || !isNELorLSEPCharacter(ch)) &&
+                    (escapingNotNeeded(ch) && (!m_charInfo.isSpecialTextChar(ch)))
+                        || ('"' == ch))
+                {
+                    ; // a character needing no special processing
                }
-                    else if (ch <= 0x9F){
-                        // Range 0x7F through 0x9F inclusive
-                        // More control characters, including NEL (0x85)
-                        writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                        writer.write("&#");
-                        writer.write(Integer.toString(ch));
-                        writer.write(';');
-                        lastDirtyCharProcessed = i;
-                }
-                    else if (ch == CharInfo.S_LINE_SEPARATOR) {
-                        // LINE SEPARATOR
-                        writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                        writer.write("&#8232;");
-                        lastDirtyCharProcessed = i;
-            }
-                    else if (m_encodingInfo.isInEncoding(ch)) {
-                        // If the character is in the encoding, and
-                        // not in the normal ASCII range, we also
-                        // just leave it get added on to the clean characters
-
-                    }
-                    else {
-                        // This is a fallback plan, we should never get here
-                        // but if the character wasn't previously handled
-                        // (i.e. isn't in the encoding, etc.) then what
-                        // should we do?  We choose to write out an entity
-                        writeOutCleanChars(chars, i, lastDirtyCharProcessed);
-                        writer.write("&#");
-                        writer.write(Integer.toString(ch));
-                        writer.write(';');
-                        lastDirtyCharProcessed = i;
-                    }
+                else
+                {
+                    lastDirty = processDirty(chars,end, i, ch, lastDirty, true);
+                    i = lastDirty;
                }
            }

            // we've reached the end. Any clean characters at the
            // end of the array than need to be written out?
-            startClean = lastDirtyCharProcessed + 1;
+            startClean = lastDirty + 1;
            if (i > startClean)
            {
                int lengthClean = i - startClean;
@ -1606,32 +1515,6 @@ abstract public class ToStream extends SerializerBase
        if (m_tracer != null)
            super.fireCharEvent(chars, start, length);
    }
-
-        private int processLineFeed(final char[] chars, int i, int lastProcessed, final Writer writer) throws IOException {
-                if (!m_lineSepUse
-                || (m_lineSepLen ==1 && m_lineSep[0] == CharInfo.S_LINEFEED)){
-                    // We are leaving the new-line alone, and it is just
-                    // being added to the 'clean' characters,
-                        // so the last dirty character processed remains unchanged
-                }
-                else {
-                    writeOutCleanChars(chars, i, lastProcessed);
-                    writer.write(m_lineSep, 0, m_lineSepLen);
-                    lastProcessed = i;
-                }
-                return lastProcessed;
-        }
-
-    private void writeOutCleanChars(final char[] chars, int i, int lastProcessed) throws IOException {
-        int startClean;
-        startClean = lastProcessed + 1;
-        if (startClean < i)
-        {
-            int lengthClean = i - startClean;
-            m_writer.write(chars, startClean, lengthClean);
-        }
-     }
-
    /**
     * This method checks if a given character is between C0 or C1 range
     * of Control characters.
@ -1751,7 +1634,7 @@ abstract public class ToStream extends SerializerBase
     *
     * @throws org.xml.sax.SAXException
     */
-    private int accumDefaultEscape(
+    protected int accumDefaultEscape(
        Writer writer,
        char ch,
        int i,
@ -1815,15 +1698,16 @@ abstract public class ToStream extends SerializerBase
                 *  to write it out as Numeric Character Reference(NCR) regardless of XML Version
                 *  being used for output document.
                 */
-                if (isCharacterInC0orC1Range(ch) || isNELorLSEPCharacter(ch))
+                if (isCharacterInC0orC1Range(ch) ||
+                        (XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
                {
                    writer.write("&#");
                    writer.write(Integer.toString(ch));
                    writer.write(';');
                }
                else if ((!escapingNotNeeded(ch) ||
-                    (  (fromTextNode && m_charInfo.shouldMapTextChar(ch))
-                     || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch))))
+                    (  (fromTextNode && m_charInfo.isSpecialTextChar(ch))
+                     || (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
                && m_elemContext.m_currentElemDepth > 0)
                {
                    writer.write("&#");
@ -2087,86 +1971,28 @@ abstract public class ToStream extends SerializerBase
        string.getChars(0,len, m_attrBuff, 0);
        final char[] stringChars = m_attrBuff;

-        for (int i = 0; i < len;)
+        for (int i = 0; i < len; )
        {
            char ch = stringChars[i];
+            if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
+            {
+                writer.write(ch);
+                i++;
+            }
+            else
+            { // I guess the parser doesn't normalize cr/lf in attributes. -sb
+//                if ((CharInfo.S_CARRIAGERETURN == ch)
+//                    && ((i + 1) < len)
+//                    && (CharInfo.S_LINEFEED == stringChars[i + 1]))
+//                {
+//                    i++;
+//                    ch = CharInfo.S_LINEFEED;
+//                }

-            if (m_charInfo.shouldMapAttrChar(ch) || !(escapingNotNeeded(ch))) {
-                // The character is supposed to be replaced by a String
-                // e.g.   '&'  -->  "&amp;"
-                // e.g.   '<'  -->  "&lt;"
                i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
            }
-            else {
-                i++;
-                if (0x0 <= ch && ch <= 0x1F) {
-                    // Range 0x00 through 0x1F inclusive
-                    // This covers the non-whitespace control characters
-                    // in the range 0x1 to 0x1F inclusive.
-                    // It also covers the whitespace control characters in the same way:
-                    // 0x9   TAB
-                    // 0xA   NEW LINE
-                    // 0xD   CARRIAGE RETURN
-                    //
-                    // We also cover 0x0 ... It isn't valid
-                    // but we will output "&#0;"
-
-                    // The default will handle this just fine, but this
-                    // is a little performance boost to handle the more
-                    // common TAB, NEW-LINE, CARRIAGE-RETURN
-                    switch (ch) {
-
-                    case CharInfo.S_HORIZONAL_TAB:
-                        writer.write("&#9;");
-                        break;
-                    case CharInfo.S_LINEFEED:
-                        writer.write("&#10;");
-                        break;
-                    case CharInfo.S_CARRIAGERETURN:
-                        writer.write("&#13;");
-                        break;
-                    default:
-                        writer.write("&#");
-                        writer.write(Integer.toString(ch));
-                        writer.write(';');
-                        break;
-
        }
-                }
-                else if (ch < 0x7F) {
-                    // Range 0x20 through 0x7E inclusive
-                    // Normal ASCII chars
-                        writer.write(ch);
-                }
-                else if (ch <= 0x9F){
-                    // Range 0x7F through 0x9F inclusive
-                    // More control characters
-                    writer.write("&#");
-                    writer.write(Integer.toString(ch));
-                    writer.write(';');
-                }
-                else if (ch == CharInfo.S_LINE_SEPARATOR) {
-                    // LINE SEPARATOR
-                    writer.write("&#8232;");
-                }
-                else if (m_encodingInfo.isInEncoding(ch)) {
-                    // If the character is in the encoding, and
-                    // not in the normal ASCII range, we also
-                    // just write it out
-                    writer.write(ch);
-                }
-                else {
-                    // This is a fallback plan, we should never get here
-                    // but if the character wasn't previously handled
-                    // (i.e. isn't in the encoding, etc.) then what
-                    // should we do?  We choose to write out a character ref
-                    writer.write("&#");
-                    writer.write(Integer.toString(ch));
-                    writer.write(';');
-                }

-    }
-        }
    }

    /**
@ -2936,14 +2762,6 @@ abstract public class ToStream extends SerializerBase
                closeCDATA();
                m_cdataTagOpen = false;
            }
-            if (m_writer != null) {
-                try {
-                    m_writer.flush();
-    }
-                catch(IOException e) {
-                    // what? me worry?
-                }
-            }
    }

    public void setContentHandler(ContentHandler ch)
--- a/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java
+++ b/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToXMLStream.java
@ -56,7 +56,7 @@ public final class ToXMLStream extends ToStream
     * Map that tells which XML characters should have special treatment, and it
     *  provides character to entity name lookup.
     */
-    private CharInfo m_xmlcharInfo =
+    private static CharInfo m_xmlcharInfo =
 //      new CharInfo(CharInfo.XML_ENTITIES_RESOURCE);
        CharInfo.getCharInfo(CharInfo.XML_ENTITIES_RESOURCE, Method.XML);

@ -329,11 +329,12 @@ public final class ToXMLStream extends ToStream

                /**
                 * Before Xalan 1497, a newline char was printed out if not inside of an
-                 * element. The whitespace is not significant if the output is standalone
+                 * element. The whitespace is not significant is the output is standalone
                */
                if (m_elemContext.m_currentElemDepth <= 0 && m_isStandalone)
                    writer.write(m_lineSep, 0, m_lineSepLen);

+
                /*
                 * Don't write out any indentation whitespace now,
                 * because there may be non-whitespace text after this.