8232168: Fix non wide char canonicalization on Windows

Reviewed-by: clanger, alanb, ccheung
This commit is contained in:
Ralf Schmelter 2019-10-15 17:08:09 +02:00
parent cf43427ce9
commit 04e885b498
2 changed files with 64 additions and 298 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -41,39 +41,9 @@
#undef DEBUG_PATH /* Define this to debug path code */ #undef DEBUG_PATH /* Define this to debug path code */
#define isfilesep(c) ((c) == '/' || (c) == '\\')
#define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
#define islb(c) (IsDBCSLeadByte((BYTE)(c)))
/* Copy bytes to dst, not going past dend; return dst + number of bytes copied, /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
or NULL if dend would have been exceeded. If first != '\0', copy that byte or NULL if dend would have been exceeded. If first != '\0', copy that byte
before copying bytes from src to send - 1. */ before copying bytes from src to send - 1. */
static char *
cp(char *dst, char *dend, char first, char *src, char *send)
{
char *p = src, *q = dst;
if (first != '\0') {
if (q < dend) {
*q++ = first;
} else {
errno = ENAMETOOLONG;
return NULL;
}
}
if (send - p > dend - q) {
errno = ENAMETOOLONG;
return NULL;
}
while (p < send) {
*q++ = *p++;
}
return q;
}
/* Wide character version of cp */
static WCHAR* static WCHAR*
wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send) wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
{ {
@ -95,23 +65,8 @@ wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
return q; return q;
} }
/* Find first instance of '\\' at or following start. Return the address of /* Find first instance of '\\' at or following start. Return the address of
that byte or the address of the null terminator if '\\' is not found. */ that byte or the address of the null terminator if '\\' is not found. */
static char *
nextsep(char *start)
{
char *p = start;
int c;
while ((c = *p) && (c != '\\')) {
p += ((islb(c) && p[1]) ? 2 : 1);
}
return p;
}
/* Wide character version of nextsep */
static WCHAR * static WCHAR *
wnextsep(WCHAR *start) wnextsep(WCHAR *start)
{ {
@ -123,21 +78,6 @@ wnextsep(WCHAR *start)
} }
/* Tell whether the given string contains any wildcard characters */ /* Tell whether the given string contains any wildcard characters */
static int
wild(char *start)
{
char *p = start;
int c;
while (c = *p) {
if ((c == '*') || (c == '?')) return 1;
p += ((islb(c) && p[1]) ? 2 : 1);
}
return 0;
}
/* Wide character version of wild */
static int static int
wwild(WCHAR *start) wwild(WCHAR *start)
{ {
@ -157,25 +97,6 @@ wwild(WCHAR *start)
Prohibited canonical paths: c:\..\x c:\x.\d c:\... Prohibited canonical paths: c:\..\x c:\x.\d c:\...
*/ */
static int static int
dots(char *start)
{
char *p = start;
while (*p) {
if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.'
return 0; // no more dots
p++; // next char
while ((*p) == '.') // go to the end of dots
p++;
if (*p && (*p != '\\')) // path element does not end with a dot
p++; // go to the next char
else
return 1; // path element does end with a dot - prohibited
}
return 0; // no prohibited combinations of dots found
}
/* Wide character version of dots */
static int
wdots(WCHAR *start) wdots(WCHAR *start)
{ {
WCHAR *p = start; WCHAR *p = start;
@ -203,7 +124,6 @@ wdots(WCHAR *start)
successfully after copying the rest of the original path to the result path. successfully after copying the rest of the original path to the result path.
Other I/O errors cause an error return. Other I/O errors cause an error return.
*/ */
int int
lastErrorReportable() lastErrorReportable()
{ {
@ -225,222 +145,10 @@ lastErrorReportable()
return 1; return 1;
} }
int wcanonicalize(WCHAR *orig_path, WCHAR *result, int size);
/* Convert a pathname to canonical form. The input orig_path is assumed to /* Convert a pathname to canonical form. The input orig_path is assumed to
have been converted to native form already, via JVM_NativePath(). This is have been converted to native form already, via JVM_NativePath(). This is
necessary because _fullpath() rejects duplicate separator characters on necessary because _fullpath() rejects duplicate separator characters on
Win95, though it accepts them on NT. */ Win95, though it accepts them on NT. */
int
canonicalize(char *orig_path, char *result, int size)
{
WIN32_FIND_DATA fd;
HANDLE h;
char path[1024]; /* Working copy of path */
char *src, *dst, *dend;
wchar_t *worig_path, *wresult;
size_t converted_chars = 0;
/* handle long path with length >= MAX_PATH */
if (strlen(orig_path) >= MAX_PATH) {
if ((worig_path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
return -1;
if (mbstowcs_s(&converted_chars, worig_path, (size_t)size, orig_path, (size_t)(size - 1)) != 0) {
free(worig_path);
return -1;
}
if ((wresult = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
return -1;
if (wcanonicalize(worig_path, wresult, size) != 0) {
free(worig_path);
free(wresult);
return -1;
}
if (wcstombs_s(&converted_chars, result, (size_t)size, wresult, (size_t)(size - 1)) != 0) {
free(worig_path);
free(wresult);
return -1;
}
free(worig_path);
free(wresult);
return 0;
}
/* Reject paths that contain wildcards */
if (wild(orig_path)) {
errno = EINVAL;
return -1;
}
/* Collapse instances of "foo\.." and ensure absoluteness. Note that
contrary to the documentation, the _fullpath procedure does not require
the drive to be available. It also does not reliably change all
occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
if (!_fullpath(path, orig_path, sizeof(path))) {
return -1;
}
/* Correction for Win95: _fullpath may leave a trailing "\\"
on a UNC pathname */
if ((path[0] == '\\') && (path[1] == '\\')) {
char *p = path + strlen(path);
if ((p[-1] == '\\') && !islb(p[-2])) {
p[-1] = '\0';
}
}
if (dots(path)) /* Check for prohibited combinations of dots */
return -1;
src = path; /* Start scanning here */
dst = result; /* Place results here */
dend = dst + size; /* Don't go to or past here */
/* Copy prefix, assuming path is absolute */
if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
/* Drive specifier */
*src = toupper(*src); /* Canonicalize drive letter */
if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
return -1;
}
src += 2;
} else if ((src[0] == '\\') && (src[1] == '\\')) {
/* UNC pathname */
char *p;
p = nextsep(src + 2); /* Skip past host name */
if (!*p) {
/* A UNC pathname must begin with "\\\\host\\share",
so reject this path as invalid if there is no share name */
errno = EINVAL;
return -1;
}
p = nextsep(p + 1); /* Skip past share name */
if (!(dst = cp(dst, dend, '\0', src, p))) {
return -1;
}
src = p;
} else {
/* Invalid path */
errno = EINVAL;
return -1;
}
/* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
/* for root pathes like "E:\" . If the path has this form, we should */
/* simply return it, it is already canonicalized. */
if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
/* At this point we have already copied the drive specifier ("z:")*/
/* so we need to copy "\" and the null character. */
result[2] = '\\';
result[3] = '\0';
return 0;
}
/* At this point we have copied either a drive specifier ("z:") or a UNC
prefix ("\\\\host\\share") to the result buffer, and src points to the
first byte of the remainder of the path. We now scan through the rest
of the path, looking up each prefix in order to find the true name of
the last element of each prefix, thereby computing the full true name of
the original path. */
while (*src) {
char *p = nextsep(src + 1); /* Find next separator */
char c = *p;
assert(*src == '\\'); /* Invariant */
*p = '\0'; /* Temporarily clear separator */
h = FindFirstFile(path, &fd); /* Look up prefix */
*p = c; /* Restore separator */
if (h != INVALID_HANDLE_VALUE) {
/* Lookup succeeded; append true name to result and continue */
FindClose(h);
if (!(dst = cp(dst, dend, '\\',
fd.cFileName,
fd.cFileName + strlen(fd.cFileName)))) {
return -1;
}
src = p;
continue;
} else {
if (!lastErrorReportable()) {
if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
return -1;
}
break;
} else {
return -1;
}
}
}
if (dst >= dend) {
errno = ENAMETOOLONG;
return -1;
}
*dst = '\0';
return 0;
}
/* Convert a pathname to canonical form. The input prefix is assumed
to be in canonical form already, and the trailing filename must not
contain any wildcard, dot/double dot, or other "tricky" characters
that are rejected by the canonicalize() routine above. This
routine is present to allow the canonicalization prefix cache to be
used while still returning canonical names with the correct
capitalization. */
int
canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
{
WIN32_FIND_DATA fd;
HANDLE h;
char *src, *dst, *dend;
src = pathWithCanonicalPrefix;
dst = result; /* Place results here */
dend = dst + size; /* Don't go to or past here */
h = FindFirstFile(pathWithCanonicalPrefix, &fd); /* Look up file */
if (h != INVALID_HANDLE_VALUE) {
/* Lookup succeeded; concatenate true name to prefix */
FindClose(h);
if (!(dst = cp(dst, dend, '\0',
canonicalPrefix,
canonicalPrefix + strlen(canonicalPrefix)))) {
return -1;
}
if (!(dst = cp(dst, dend, '\\',
fd.cFileName,
fd.cFileName + strlen(fd.cFileName)))) {
return -1;
}
} else {
if (!lastErrorReportable()) {
if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
return -1;
}
} else {
return -1;
}
}
if (dst >= dend) {
errno = ENAMETOOLONG;
return -1;
}
*dst = '\0';
return 0;
}
/* Wide character version of canonicalize. Size is a wide-character size. */
int int
wcanonicalize(WCHAR *orig_path, WCHAR *result, int size) wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
{ {
@ -559,9 +267,13 @@ wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
return -1; return -1;
} }
/* Convert a pathname to canonical form. The input prefix is assumed
/* Wide character version of canonicalizeWithPrefix. */ to be in canonical form already, and the trailing filename must not
contain any wildcard, dot/double dot, or other "tricky" characters
that are rejected by the canonicalize() routine above. This
routine is present to allow the canonicalization prefix cache to be
used while still returning canonical names with the correct
capitalization. */
int int
wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size) wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
{ {
@ -613,6 +325,46 @@ wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix,
return 0; return 0;
} }
/* Non-Wide character version of canonicalize.
Converts to whchar and delegates to wcanonicalize. */
int
canonicalize(char* orig_path, char* result, int size) {
wchar_t* wpath = NULL;
wchar_t* wresult = NULL;
size_t conv;
size_t path_len = strlen(orig_path);
int ret = -1;
if ((wpath = (wchar_t*) malloc(sizeof(wchar_t) * (path_len + 1))) == NULL) {
goto finish;
}
if (mbstowcs_s(&conv, wpath, path_len + 1, orig_path, path_len) != 0) {
goto finish;
}
if ((wresult = (wchar_t*) malloc(sizeof(wchar_t) * size)) == NULL) {
goto finish;
}
if (wcanonicalize(wpath, wresult, size) != 0) {
goto finish;
}
if (wcstombs_s(&conv, result, (size_t) size, wresult, (size_t) (size - 1)) != 0) {
goto finish;
}
// Change return value to success.
ret = 0;
finish:
free(wresult);
free(wpath);
return ret;
}
/* The appropriate location of getPrefixed() should be io_util_md.c, but /* The appropriate location of getPrefixed() should be io_util_md.c, but
java.lang.instrument package has hardwired canonicalize_md.c into their java.lang.instrument package has hardwired canonicalize_md.c into their

View File

@ -102,8 +102,6 @@ public class LongBCP {
.shouldHaveExitValue(0); .shouldHaveExitValue(0);
// relative path tests // relative path tests
// We currently cannot handle relative path specified in the
// -Xbootclasspath/a on windows.
// //
// relative path length within the file system limit // relative path length within the file system limit
int fn_max_length = 255; int fn_max_length = 255;
@ -124,6 +122,22 @@ public class LongBCP {
pb = ProcessTools.createJavaProcessBuilder( pb = ProcessTools.createJavaProcessBuilder(
bootCP, "Hello"); bootCP, "Hello");
output = new OutputAnalyzer(pb.start());
output.shouldContain("Hello World")
.shouldHaveExitValue(0);
// Test a relative path for a jar file < MAX_PATH, but where the
// absolute path is > MAX_PATH.
Path jarDir = Paths.get(".");
for (int i = 0; i < 21; ++i) {
jarDir = jarDir.resolve("0123456789");
}
Files.createDirectories(jarDir);
Path jarPath = jarDir.resolve("hello.jar");
Files.copy(Paths.get(helloJar), jarPath);
bootCP = "-Xbootclasspath/a:" + jarPath.toString();
pb = ProcessTools.createJavaProcessBuilder(bootCP, "Hello");
output = new OutputAnalyzer(pb.start()); output = new OutputAnalyzer(pb.start());
output.shouldContain("Hello World") output.shouldContain("Hello World")
.shouldHaveExitValue(0); .shouldHaveExitValue(0);