8316734: URLEncoder should specify that replacement bytes will be used in case of coding error

Reviewed-by: dfuchs, alanb
2023-11-29 12:59:19 +00:00 · 2023-11-29 12:59:19 +00:00 · 48960df7bc
commit 48960df7bc
parent 159465324f
2 changed files with 17 additions and 13 deletions
--- a/src/java.base/share/classes/java/net/URLDecoder.java
+++ b/src/java.base/share/classes/java/net/URLDecoder.java
@ -98,6 +98,8 @@ public class URLDecoder {
     *          default charset. Instead, use the decode(String,String) method
     *          to specify the encoding.
     * @return the newly decoded {@code String}
+     * @throws IllegalArgumentException if the implementation encounters malformed
+     * escape sequences
     */
    @Deprecated
    public static String decode(String s) {
@ -113,9 +115,6 @@ public class URLDecoder {
     * except that it will {@linkplain Charset#forName look up the charset}
     * using the given encoding name.
     *
-     * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
-     * when illegal strings are encountered.
-     *
     * @param s the {@code String} to decode
     * @param enc   The name of a supported
     *    <a href="../lang/package-summary.html#charenc">character
@ -124,6 +123,8 @@ public class URLDecoder {
     * @throws UnsupportedEncodingException
     *             If character encoding needs to be consulted, but
     *             named character encoding is not supported
+     * @throws IllegalArgumentException if the implementation encounters malformed
+     * escape sequences
     * @see URLEncoder#encode(java.lang.String, java.lang.String)
     * @since 1.4
     */
@ -144,8 +145,10 @@ public class URLDecoder {
     * Decodes an {@code application/x-www-form-urlencoded} string using
     * a specific {@linkplain Charset Charset}.
     * The supplied charset is used to determine
-     * what characters are represented by any consecutive sequences of the
-     * form "<i>{@code %xy}</i>".
+     * what characters are represented by any consecutive escape sequences of
+     * the form "<i>{@code %xy}</i>". Erroneous bytes are replaced with the
+     * supplied {@code Charset}'s {@linkplain java.nio.charset.CharsetDecoder##cae
+     * replacement value}.
     * <p>
     * <em><strong>Note:</strong> The <a href=
     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
@ -153,15 +156,12 @@ public class URLDecoder {
     * UTF-8 should be used. Not doing so may introduce
     * incompatibilities.</em>
     *
-     * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
-     * when illegal strings are encountered.
-     *
     * @param s the {@code String} to decode
     * @param charset the given charset
     * @return the newly decoded {@code String}
     * @throws NullPointerException if {@code s} or {@code charset} is {@code null}
-     * @throws IllegalArgumentException if the implementation encounters illegal
-     * characters
+     * @throws IllegalArgumentException if the implementation encounters malformed
+     * escape sequences
     *
     * @spec https://www.w3.org/TR/html4 HTML 4.01 Specification
     * @see URLEncoder#encode(java.lang.String, Charset)
--- a/src/java.base/share/classes/java/net/URLEncoder.java
+++ b/src/java.base/share/classes/java/net/URLEncoder.java
@ -200,11 +200,15 @@ public class URLEncoder {
     * This method uses the supplied charset to obtain the bytes for unsafe
     * characters.
     * <p>
-     * <em><strong>Note:</strong> The <a href=
+     * If the input string is malformed, or if the input cannot be mapped
+     * to a valid byte sequence in the given {@code Charset}, then the
+     * erroneous input will be replaced with the {@code Charset}'s
+     * {@linkplain CharsetEncoder##cae replacement values}.
+     *
+     * @apiNote The <a href=
     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
     * World Wide Web Consortium Recommendation</a> states that
-     * UTF-8 should be used. Not doing so may introduce incompatibilities.</em>
-     *
+     * UTF-8 should be used. Not doing so may introduce incompatibilities.
     * @param   s   {@code String} to be translated.
     * @param charset the given charset
     * @return  the translated {@code String}.