8316734: URLEncoder should specify that replacement bytes will be used in case of coding error

Reviewed-by: dfuchs, alanb
This commit is contained in:
Darragh Clarke 2023-11-29 12:59:19 +00:00
parent 159465324f
commit 48960df7bc
2 changed files with 17 additions and 13 deletions
src/java.base/share/classes/java/net

@ -98,6 +98,8 @@ public class URLDecoder {
* default charset. Instead, use the decode(String,String) method
* to specify the encoding.
* @return the newly decoded {@code String}
* @throws IllegalArgumentException if the implementation encounters malformed
* escape sequences
*/
@Deprecated
public static String decode(String s) {
@ -113,9 +115,6 @@ public class URLDecoder {
* except that it will {@linkplain Charset#forName look up the charset}
* using the given encoding name.
*
* @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
* when illegal strings are encountered.
*
* @param s the {@code String} to decode
* @param enc The name of a supported
* <a href="../lang/package-summary.html#charenc">character
@ -124,6 +123,8 @@ public class URLDecoder {
* @throws UnsupportedEncodingException
* If character encoding needs to be consulted, but
* named character encoding is not supported
* @throws IllegalArgumentException if the implementation encounters malformed
* escape sequences
* @see URLEncoder#encode(java.lang.String, java.lang.String)
* @since 1.4
*/
@ -144,8 +145,10 @@ public class URLDecoder {
* Decodes an {@code application/x-www-form-urlencoded} string using
* a specific {@linkplain Charset Charset}.
* The supplied charset is used to determine
* what characters are represented by any consecutive sequences of the
* form "<i>{@code %xy}</i>".
* what characters are represented by any consecutive escape sequences of
* the form "<i>{@code %xy}</i>". Erroneous bytes are replaced with the
* supplied {@code Charset}'s {@linkplain java.nio.charset.CharsetDecoder##cae
* replacement value}.
* <p>
* <em><strong>Note:</strong> The <a href=
* "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
@ -153,15 +156,12 @@ public class URLDecoder {
* UTF-8 should be used. Not doing so may introduce
* incompatibilities.</em>
*
* @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
* when illegal strings are encountered.
*
* @param s the {@code String} to decode
* @param charset the given charset
* @return the newly decoded {@code String}
* @throws NullPointerException if {@code s} or {@code charset} is {@code null}
* @throws IllegalArgumentException if the implementation encounters illegal
* characters
* @throws IllegalArgumentException if the implementation encounters malformed
* escape sequences
*
* @spec https://www.w3.org/TR/html4 HTML 4.01 Specification
* @see URLEncoder#encode(java.lang.String, Charset)

@ -200,11 +200,15 @@ public class URLEncoder {
* This method uses the supplied charset to obtain the bytes for unsafe
* characters.
* <p>
* <em><strong>Note:</strong> The <a href=
* If the input string is malformed, or if the input cannot be mapped
* to a valid byte sequence in the given {@code Charset}, then the
* erroneous input will be replaced with the {@code Charset}'s
* {@linkplain CharsetEncoder##cae replacement values}.
*
* @apiNote The <a href=
* "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
* World Wide Web Consortium Recommendation</a> states that
* UTF-8 should be used. Not doing so may introduce incompatibilities.</em>
*
* UTF-8 should be used. Not doing so may introduce incompatibilities.
* @param s {@code String} to be translated.
* @param charset the given charset
* @return the translated {@code String}.