8207760: SAXException: Invalid UTF-16 surrogate detected: d83c ?
Reviewed-by: lancea, dfuchs
This commit is contained in:
parent
b6180e668e
commit
9ed646a020
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -40,6 +40,7 @@ import com.sun.org.apache.xml.internal.serializer.utils.Utils;
|
||||
* because it is used from another package.
|
||||
*
|
||||
* @xsl.usage internal
|
||||
* @LastModified: Sept 2018
|
||||
*/
|
||||
public final class ToHTMLStream extends ToStream
|
||||
{
|
||||
@ -1049,7 +1050,7 @@ public final class ToHTMLStream extends ToStream
|
||||
String name,
|
||||
String value,
|
||||
ElemDesc elemDesc)
|
||||
throws IOException
|
||||
throws IOException, SAXException
|
||||
{
|
||||
writer.write(' ');
|
||||
|
||||
@ -1373,7 +1374,7 @@ public final class ToHTMLStream extends ToStream
|
||||
*/
|
||||
public void writeAttrString(
|
||||
final java.io.Writer writer, String string, String encoding)
|
||||
throws IOException
|
||||
throws IOException, SAXException
|
||||
{
|
||||
final int end = string.length();
|
||||
if (end > m_attrBuff.length)
|
||||
@ -1425,13 +1426,16 @@ public final class ToHTMLStream extends ToStream
|
||||
}
|
||||
else
|
||||
{
|
||||
if (Encodings.isHighUTF16Surrogate(ch))
|
||||
if (Encodings.isHighUTF16Surrogate(ch) ||
|
||||
Encodings.isLowUTF16Surrogate(ch))
|
||||
{
|
||||
|
||||
writeUTF16Surrogate(ch, chars, i, end);
|
||||
i++; // two input characters processed
|
||||
// this increments by one and the for()
|
||||
// loop itself increments by another one.
|
||||
if (writeUTF16Surrogate(ch, chars, i, end) >= 0) {
|
||||
// move the index if the low surrogate is consumed
|
||||
// as writeUTF16Surrogate has written the pair
|
||||
if (Encodings.isHighUTF16Surrogate(ch)) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The next is kind of a hack to keep from escaping in the case
|
||||
|
@ -51,7 +51,7 @@ import org.xml.sax.SAXException;
|
||||
* serializers (xml, html, text ...) that write output to a stream.
|
||||
*
|
||||
* @xsl.usage internal
|
||||
* @LastModified: Feb 2018
|
||||
* @LastModified: Sept 2018
|
||||
*/
|
||||
abstract public class ToStream extends SerializerBase {
|
||||
|
||||
@ -193,6 +193,8 @@ abstract public class ToStream extends SerializerBase {
|
||||
*/
|
||||
private boolean m_expandDTDEntities = true;
|
||||
|
||||
private char m_highSurrogate = 0;
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*/
|
||||
@ -953,45 +955,46 @@ abstract public class ToStream extends SerializerBase {
|
||||
* @param ch Character array.
|
||||
* @param i position Where the surrogate was detected.
|
||||
* @param end The end index of the significant characters.
|
||||
* @return 0 if the pair of characters was written out as-is,
|
||||
* the unicode code point of the character represented by
|
||||
* the surrogate pair if an entity reference with that value
|
||||
* was written out.
|
||||
* @return the status of writing a surrogate pair.
|
||||
* -1 -- nothing is written
|
||||
* 0 -- the pair is written as-is
|
||||
* code point -- the pair is written as an entity reference
|
||||
*
|
||||
* @throws IOException
|
||||
* @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
|
||||
*/
|
||||
protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
|
||||
throws IOException
|
||||
throws IOException, SAXException
|
||||
{
|
||||
int codePoint = 0;
|
||||
int status = -1;
|
||||
if (i + 1 >= end)
|
||||
{
|
||||
throw new IOException(
|
||||
Utils.messages.createMessage(
|
||||
MsgKey.ER_INVALID_UTF16_SURROGATE,
|
||||
new Object[] { Integer.toHexString((int) c)}));
|
||||
m_highSurrogate = c;
|
||||
return status;
|
||||
}
|
||||
|
||||
char high, low;
|
||||
if (m_highSurrogate == 0) {
|
||||
high = c;
|
||||
low = ch[i+1];
|
||||
status = 0;
|
||||
} else {
|
||||
high = m_highSurrogate;
|
||||
low = c;
|
||||
m_highSurrogate = 0;
|
||||
}
|
||||
|
||||
final char high = c;
|
||||
final char low = ch[i+1];
|
||||
if (!Encodings.isLowUTF16Surrogate(low)) {
|
||||
throw new IOException(
|
||||
Utils.messages.createMessage(
|
||||
MsgKey.ER_INVALID_UTF16_SURROGATE,
|
||||
new Object[] {
|
||||
Integer.toHexString((int) c)
|
||||
+ " "
|
||||
+ Integer.toHexString(low)}));
|
||||
throwIOE(high, low);
|
||||
}
|
||||
|
||||
final Writer writer = m_writer;
|
||||
|
||||
// If we make it to here we have a valid high, low surrogate pair
|
||||
if (m_encodingInfo.isInEncoding(c,low)) {
|
||||
if (m_encodingInfo.isInEncoding(high,low)) {
|
||||
// If the character formed by the surrogate pair
|
||||
// is in the encoding, so just write it out
|
||||
writer.write(ch,i,2);
|
||||
writer.write(new char[]{high, low}, 0, 2);
|
||||
}
|
||||
else {
|
||||
// Don't know what to do with this char, it is
|
||||
@ -999,24 +1002,16 @@ abstract public class ToStream extends SerializerBase {
|
||||
// a surrogate pair, so write out as an entity ref
|
||||
final String encoding = getEncoding();
|
||||
if (encoding != null) {
|
||||
/* The output encoding is known,
|
||||
* so somthing is wrong.
|
||||
*/
|
||||
codePoint = Encodings.toCodePoint(high, low);
|
||||
// not in the encoding, so write out a character reference
|
||||
writer.write('&');
|
||||
writer.write('#');
|
||||
writer.write(Integer.toString(codePoint));
|
||||
writer.write(';');
|
||||
status = writeCharRef(writer, high, low);
|
||||
} else {
|
||||
/* The output encoding is not known,
|
||||
* so just write it out as-is.
|
||||
*/
|
||||
writer.write(ch, i, 2);
|
||||
writer.write(new char[]{high, low}, 0, 2);
|
||||
}
|
||||
}
|
||||
// non-zero only if character reference was written out.
|
||||
return codePoint;
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1106,32 +1101,7 @@ abstract public class ToStream extends SerializerBase {
|
||||
}
|
||||
else if (isCData && (!escapingNotNeeded(c)))
|
||||
{
|
||||
// if (i != 0)
|
||||
if (m_cdataTagOpen)
|
||||
closeCDATA();
|
||||
|
||||
// This needs to go into a function...
|
||||
if (Encodings.isHighUTF16Surrogate(c))
|
||||
{
|
||||
writeUTF16Surrogate(c, ch, i, end);
|
||||
i++ ; // process two input characters
|
||||
}
|
||||
else
|
||||
{
|
||||
writer.write("&#");
|
||||
|
||||
String intStr = Integer.toString((int) c);
|
||||
|
||||
writer.write(intStr);
|
||||
writer.write(';');
|
||||
}
|
||||
|
||||
// if ((i != 0) && (i < (end - 1)))
|
||||
// if (!m_cdataTagOpen && (i < (end - 1)))
|
||||
// {
|
||||
// writer.write(CDATA_DELIMITER_OPEN);
|
||||
// m_cdataTagOpen = true;
|
||||
// }
|
||||
i = handleEscaping(writer, c, ch, i, end);
|
||||
}
|
||||
else if (
|
||||
isCData
|
||||
@ -1155,31 +1125,46 @@ abstract public class ToStream extends SerializerBase {
|
||||
}
|
||||
writer.write(c);
|
||||
}
|
||||
|
||||
// This needs to go into a function...
|
||||
else if (Encodings.isHighUTF16Surrogate(c))
|
||||
{
|
||||
if (m_cdataTagOpen)
|
||||
closeCDATA();
|
||||
writeUTF16Surrogate(c, ch, i, end);
|
||||
i++; // process two input characters
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_cdataTagOpen)
|
||||
closeCDATA();
|
||||
writer.write("&#");
|
||||
|
||||
String intStr = Integer.toString((int) c);
|
||||
|
||||
writer.write(intStr);
|
||||
writer.write(';');
|
||||
else {
|
||||
i = handleEscaping(writer, c, ch, i, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles escaping, writes either with a surrogate pair or a character
|
||||
* reference.
|
||||
*
|
||||
* @param c the current char
|
||||
* @param ch the character array
|
||||
* @param i the current position
|
||||
* @param end the end index of the array
|
||||
* @return the next index
|
||||
*
|
||||
* @throws IOException
|
||||
* @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
|
||||
*/
|
||||
private int handleEscaping(Writer writer, char c, char ch[], int i, int end)
|
||||
throws IOException, SAXException {
|
||||
if (Encodings.isHighUTF16Surrogate(c) || Encodings.isLowUTF16Surrogate(c))
|
||||
{
|
||||
if (writeUTF16Surrogate(c, ch, i, end) >= 0) {
|
||||
// move the index if the low surrogate is consumed
|
||||
// as writeUTF16Surrogate has written the pair
|
||||
if (Encodings.isHighUTF16Surrogate(c)) {
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
writeCharRef(writer, c);
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ends an un-escaping section.
|
||||
*
|
||||
@ -1246,7 +1231,7 @@ abstract public class ToStream extends SerializerBase {
|
||||
m_elemContext.m_startTagOpen = false;
|
||||
}
|
||||
|
||||
if (shouldIndent())
|
||||
if (!m_cdataTagOpen && shouldIndent())
|
||||
indent();
|
||||
|
||||
boolean writeCDataBrackets =
|
||||
@ -1644,7 +1629,7 @@ abstract public class ToStream extends SerializerBase {
|
||||
int i,
|
||||
char ch,
|
||||
int lastDirty,
|
||||
boolean fromTextNode) throws IOException
|
||||
boolean fromTextNode) throws IOException, SAXException
|
||||
{
|
||||
int startClean = lastDirty + 1;
|
||||
// if we have some clean characters accumulated
|
||||
@ -1723,54 +1708,40 @@ abstract public class ToStream extends SerializerBase {
|
||||
int len,
|
||||
boolean fromTextNode,
|
||||
boolean escLF)
|
||||
throws IOException
|
||||
throws IOException, SAXException
|
||||
{
|
||||
|
||||
int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
|
||||
|
||||
if (i == pos)
|
||||
{
|
||||
if (m_highSurrogate != 0) {
|
||||
if (!(Encodings.isLowUTF16Surrogate(ch))) {
|
||||
throwIOE(m_highSurrogate, ch);
|
||||
}
|
||||
writeCharRef(writer, m_highSurrogate, ch);
|
||||
m_highSurrogate = 0;
|
||||
return ++pos;
|
||||
}
|
||||
|
||||
if (Encodings.isHighUTF16Surrogate(ch))
|
||||
{
|
||||
|
||||
// Should be the UTF-16 low surrogate of the hig/low pair.
|
||||
char next;
|
||||
// Unicode code point formed from the high/low pair.
|
||||
int codePoint = 0;
|
||||
|
||||
if (i + 1 >= len)
|
||||
{
|
||||
throw new IOException(
|
||||
Utils.messages.createMessage(
|
||||
MsgKey.ER_INVALID_UTF16_SURROGATE,
|
||||
new Object[] { Integer.toHexString(ch)}));
|
||||
//"Invalid UTF-16 surrogate detected: "
|
||||
|
||||
//+Integer.toHexString(ch)+ " ?");
|
||||
// save for the next read
|
||||
m_highSurrogate = ch;
|
||||
pos++;
|
||||
}
|
||||
else
|
||||
{
|
||||
next = chars[++i];
|
||||
|
||||
// the next should be the UTF-16 low surrogate of the hig/low pair.
|
||||
char next = chars[++i];
|
||||
if (!(Encodings.isLowUTF16Surrogate(next)))
|
||||
throw new IOException(
|
||||
Utils.messages.createMessage(
|
||||
MsgKey
|
||||
.ER_INVALID_UTF16_SURROGATE,
|
||||
new Object[] {
|
||||
Integer.toHexString(ch)
|
||||
+ " "
|
||||
+ Integer.toHexString(next)}));
|
||||
//"Invalid UTF-16 surrogate detected: "
|
||||
throwIOE(ch, next);
|
||||
|
||||
//+Integer.toHexString(ch)+" "+Integer.toHexString(next));
|
||||
codePoint = Encodings.toCodePoint(ch,next);
|
||||
writeCharRef(writer, ch, next);
|
||||
pos += 2; // count the two characters that went into writing out this entity
|
||||
}
|
||||
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(codePoint));
|
||||
writer.write(';');
|
||||
pos += 2; // count the two characters that went into writing out this entity
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1782,18 +1753,14 @@ abstract public class ToStream extends SerializerBase {
|
||||
if (isCharacterInC0orC1Range(ch) ||
|
||||
(XMLVERSION11.equals(getVersion()) && isNELorLSEPCharacter(ch)))
|
||||
{
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
writeCharRef(writer, ch);
|
||||
}
|
||||
else if ((!escapingNotNeeded(ch) ||
|
||||
( (fromTextNode && m_charInfo.isSpecialTextChar(ch))
|
||||
|| (!fromTextNode && m_charInfo.isSpecialAttrChar(ch))))
|
||||
&& m_elemContext.m_currentElemDepth > 0)
|
||||
&& m_elemContext.m_currentElemDepth > 0)
|
||||
{
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(ch));
|
||||
writer.write(';');
|
||||
writeCharRef(writer, ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1806,6 +1773,45 @@ abstract public class ToStream extends SerializerBase {
|
||||
return pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes out a character reference.
|
||||
* @param writer the writer
|
||||
* @param c the character
|
||||
* @throws IOException
|
||||
*/
|
||||
private void writeCharRef(Writer writer, char c) throws IOException, SAXException {
|
||||
if (m_cdataTagOpen)
|
||||
closeCDATA();
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(c));
|
||||
writer.write(';');
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes out a pair of surrogates as a character reference
|
||||
* @param writer the writer
|
||||
* @param high the high surrogate
|
||||
* @param low the low surrogate
|
||||
* @throws IOException
|
||||
*/
|
||||
private int writeCharRef(Writer writer, char high, char low) throws IOException, SAXException {
|
||||
if (m_cdataTagOpen)
|
||||
closeCDATA();
|
||||
// Unicode code point formed from the high/low pair.
|
||||
int codePoint = Encodings.toCodePoint(high, low);
|
||||
writer.write("&#");
|
||||
writer.write(Integer.toString(codePoint));
|
||||
writer.write(';');
|
||||
return codePoint;
|
||||
}
|
||||
|
||||
private void throwIOE(char ch, char next) throws IOException {
|
||||
throw new IOException(Utils.messages.createMessage(
|
||||
MsgKey.ER_INVALID_UTF16_SURROGATE,
|
||||
new Object[] {Integer.toHexString(ch) + " "
|
||||
+ Integer.toHexString(next)}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Receive notification of the beginning of an element, although this is a
|
||||
* SAX method additional namespace or attribute information can occur before
|
||||
@ -2053,7 +2059,7 @@ abstract public class ToStream extends SerializerBase {
|
||||
Writer writer,
|
||||
String string,
|
||||
String encoding)
|
||||
throws IOException
|
||||
throws IOException, SAXException
|
||||
{
|
||||
final int len = string.length();
|
||||
if (len > m_attrBuff.length)
|
||||
|
@ -1,6 +1,5 @@
|
||||
/*
|
||||
* reserved comment block
|
||||
* DO NOT REMOVE OR ALTER!
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -34,6 +33,7 @@ import org.xml.sax.SAXException;
|
||||
* This class converts SAX or SAX-like calls to a
|
||||
* serialized document for xsl:output method of "text".
|
||||
* @xsl.usage internal
|
||||
* @LastModified: Sept 2018
|
||||
*/
|
||||
public final class ToTextStream extends ToStream
|
||||
{
|
||||
@ -295,23 +295,32 @@ public final class ToTextStream extends ToStream
|
||||
} else if (m_encodingInfo.isInEncoding(c)) {
|
||||
writer.write(c);
|
||||
// one input char processed
|
||||
} else if (Encodings.isHighUTF16Surrogate(c)) {
|
||||
} else if (Encodings.isHighUTF16Surrogate(c) ||
|
||||
Encodings.isLowUTF16Surrogate(c)) {
|
||||
final int codePoint = writeUTF16Surrogate(c, ch, i, end);
|
||||
if (codePoint != 0) {
|
||||
// I think we can just emit the message,
|
||||
// not crash and burn.
|
||||
final String integralValue = Integer.toString(codePoint);
|
||||
final String msg = Utils.messages.createMessage(
|
||||
MsgKey.ER_ILLEGAL_CHARACTER,
|
||||
new Object[] { integralValue, encoding });
|
||||
if (codePoint >= 0) {
|
||||
// move the index if the low surrogate is consumed
|
||||
// as writeUTF16Surrogate has written the pair
|
||||
if (Encodings.isHighUTF16Surrogate(c)) {
|
||||
i++;
|
||||
}
|
||||
|
||||
//Older behavior was to throw the message,
|
||||
//but newer gentler behavior is to write a message to System.err
|
||||
//throw new SAXException(msg);
|
||||
System.err.println(msg);
|
||||
// printing to the console is not appropriate, but will leave
|
||||
// it as is for compatibility.
|
||||
if (codePoint >0) {
|
||||
// I think we can just emit the message,
|
||||
// not crash and burn.
|
||||
final String integralValue = Integer.toString(codePoint);
|
||||
final String msg = Utils.messages.createMessage(
|
||||
MsgKey.ER_ILLEGAL_CHARACTER,
|
||||
new Object[] { integralValue, encoding });
|
||||
|
||||
//Older behavior was to throw the message,
|
||||
//but newer gentler behavior is to write a message to System.err
|
||||
//throw new SAXException(msg);
|
||||
System.err.println(msg);
|
||||
}
|
||||
}
|
||||
i++; // two input chars processed
|
||||
} else {
|
||||
// Don't know what to do with this char, it is
|
||||
// not in the encoding and not a high char in
|
||||
|
175
test/jaxp/javax/xml/jaxp/unittest/transform/JDK8207760.java
Normal file
175
test/jaxp/javax/xml/jaxp/unittest/transform/JDK8207760.java
Normal file
@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package transform;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringReader;
|
||||
import java.io.StringWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Listeners;
|
||||
import org.testng.annotations.Test;
|
||||
import java.util.Random;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @library /javax/xml/jaxp/libs /javax/xml/jaxp/unittest
|
||||
* @run testng/othervm transform.JDK8207760
|
||||
* @summary Verifies that a surrogate pair at the edge of a buffer is properly handled
|
||||
* @bug 8207760
|
||||
*/
|
||||
@Listeners({jaxp.library.FilePolicy.class})
|
||||
public class JDK8207760 {
|
||||
final String xsl8207760 =
|
||||
"<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" +
|
||||
" <xsl:output omit-xml-declaration=\"yes\" indent=\"no\" />\n" +
|
||||
"\n" +
|
||||
" <xsl:template match=\"node()|@*\">\n" +
|
||||
" <xsl:copy>\n" +
|
||||
" <xsl:apply-templates select=\"node()|@*\" />\n" +
|
||||
" </xsl:copy>\n" +
|
||||
" </xsl:template>\n" +
|
||||
"</xsl:stylesheet>\n";
|
||||
|
||||
final String xsl8207760_2 = "<xsl:stylesheet \n" +
|
||||
" version=\"1.0\" \n" +
|
||||
" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" +
|
||||
"\n" +
|
||||
" <xsl:output method=\"xml\" indent=\"no\" cdata-section-elements=\"source\"/>\n" +
|
||||
"\n" +
|
||||
" <xsl:template match=\"source\">\n" +
|
||||
" <xsl:copy>\n" +
|
||||
" <xsl:apply-templates select=\"node()\" />\n" +
|
||||
" </xsl:copy>\n" +
|
||||
" </xsl:template>\n" +
|
||||
"\n" +
|
||||
"</xsl:stylesheet>";
|
||||
|
||||
final String xsl8207760_3 = "<xsl:stylesheet \n" +
|
||||
" version=\"1.0\" \n" +
|
||||
" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" +
|
||||
"\n" +
|
||||
" <xsl:output method=\"xml\" indent=\"no\" cdata-section-elements=\"source\"/>\n" +
|
||||
"\n" +
|
||||
" <xsl:template match=\"source\">\n" +
|
||||
" <xsl:copy>\n" +
|
||||
" <!-- Copy the attributes -->\n" +
|
||||
" <xsl:apply-templates select=\"@*\"/>\n" +
|
||||
" <!-- Convert the contained nodes (elements and text) into text -->\n" +
|
||||
" <xsl:variable name=\"subElementsText\">\n" +
|
||||
" <xsl:apply-templates select=\"node()\"/>\n" +
|
||||
" </xsl:variable>\n" +
|
||||
" <!-- Output the XML directive and the converted nodes -->\n" +
|
||||
" <xsl:value-of select=\"$subElementsText\"/>\n" +
|
||||
" </xsl:copy>\n" +
|
||||
" </xsl:template>\n" +
|
||||
"\n" +
|
||||
"</xsl:stylesheet>";
|
||||
|
||||
@DataProvider(name = "xsls")
|
||||
public Object[][] getDataBug8207760_cdata() {
|
||||
return new Object[][]{
|
||||
{xsl8207760_2},
|
||||
{xsl8207760_3},
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* @bug 8207760
|
||||
* Verifies that a surrogate pair at the edge of a buffer is properly handled
|
||||
* when serializing into a Character section.
|
||||
*/
|
||||
@Test
|
||||
public final void testBug8207760() throws Exception {
|
||||
String[] xmls = prepareXML(false);
|
||||
Transformer t = createTransformerFromInputstream(
|
||||
new ByteArrayInputStream(xsl8207760.getBytes(StandardCharsets.UTF_8)));
|
||||
t.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
|
||||
StringWriter sw = new StringWriter();
|
||||
t.transform(new StreamSource(new StringReader(xmls[0])), new StreamResult(sw));
|
||||
Assert.assertEquals(sw.toString().replaceAll(System.lineSeparator(), "\n"), xmls[1]);
|
||||
}
|
||||
|
||||
/*
|
||||
* @bug 8207760
|
||||
* Verifies that a surrogate pair at the edge of a buffer is properly handled
|
||||
* when serializing into a CDATA section.
|
||||
*/
|
||||
@Test(dataProvider = "xsls")
|
||||
public final void testBug8207760_cdata(String xsl) throws Exception {
|
||||
String[] xmls = prepareXML(true);
|
||||
Transformer t = createTransformerFromInputstream(
|
||||
new ByteArrayInputStream(xsl.getBytes(StandardCharsets.UTF_8)));
|
||||
t.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
|
||||
StringWriter sw = new StringWriter();
|
||||
t.transform(new StreamSource(new StringReader(xmls[0])), new StreamResult(sw));
|
||||
Assert.assertEquals(sw.toString().replaceAll(System.lineSeparator(), "\n"), xmls[1]);
|
||||
}
|
||||
|
||||
private String[] prepareXML(boolean cdata) {
|
||||
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><source>";
|
||||
if (cdata) {
|
||||
xml += "<![CDATA[";
|
||||
}
|
||||
String tail = "abc 123 </source>";
|
||||
if (cdata) {
|
||||
tail = "abc 123 ]]></source>";
|
||||
}
|
||||
String temp = generateString(1023);
|
||||
xml = xml + temp + '\uD83C' + '\uDF42' + tail;
|
||||
//xml = xml + temp + tail;
|
||||
String expected = (!cdata) ? "<source>" + temp + "🍂" + tail
|
||||
: xml;
|
||||
|
||||
return new String[]{xml, expected};
|
||||
}
|
||||
|
||||
static final char[] CHARS = "abcdefghijklmnopqrstuvwxyz \n".toCharArray();
|
||||
StringBuilder sb = new StringBuilder(1024 << 4);
|
||||
Random random = new Random();
|
||||
|
||||
private String generateString(int size) {
|
||||
sb.setLength(0);
|
||||
for (int i = 0; i < size; i++) {
|
||||
char c = CHARS[random.nextInt(CHARS.length)];
|
||||
sb.append(c);
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private Transformer createTransformerFromInputstream(InputStream xslStream)
|
||||
throws TransformerException {
|
||||
return TransformerFactory.newInstance().newTransformer(new StreamSource(xslStream));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user