8232168: Fix non wide char canonicalization on Windows

Reviewed-by: clanger, alanb, ccheung
2019-10-15 17:08:09 +02:00 · 2019-10-15 17:08:09 +02:00 · 04e885b498
commit 04e885b498
parent cf43427ce9
2 changed files with 64 additions and 298 deletions
--- a/src/java.base/windows/native/libjava/canonicalize_md.c
+++ b/src/java.base/windows/native/libjava/canonicalize_md.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -41,39 +41,9 @@
 #undef DEBUG_PATH        /* Define this to debug path code */
 #define isfilesep(c) ((c) == '/' || (c) == '\\')
 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
 #define islb(c)      (IsDBCSLeadByte((BYTE)(c)))
 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
   or NULL if dend would have been exceeded.  If first != '\0', copy that byte
   before copying bytes from src to send - 1. */
 static char *
 cp(char *dst, char *dend, char first, char *src, char *send)
 {
    char *p = src, *q = dst;
    if (first != '\0') {
        if (q < dend) {
            *q++ = first;
        } else {
            errno = ENAMETOOLONG;
            return NULL;
        }
    }
    if (send - p > dend - q) {
        errno = ENAMETOOLONG;
        return NULL;
    }
    while (p < send) {
        *q++ = *p++;
    }
    return q;
 }
 /* Wide character version of cp */
 static WCHAR*
 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
 {
@ -95,23 +65,8 @@ wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
    return q;
 }
 /* Find first instance of '\\' at or following start.  Return the address of
   that byte or the address of the null terminator if '\\' is not found. */
 static char *
 nextsep(char *start)
 {
    char *p = start;
    int c;
    while ((c = *p) && (c != '\\')) {
        p += ((islb(c) && p[1]) ? 2 : 1);
    }
    return p;
 }
 /* Wide character version of nextsep */
 static WCHAR *
 wnextsep(WCHAR *start)
 {
@ -123,21 +78,6 @@ wnextsep(WCHAR *start)
 }
 /* Tell whether the given string contains any wildcard characters */
 static int
 wild(char *start)
 {
    char *p = start;
    int c;
    while (c = *p) {
        if ((c == '*') || (c == '?')) return 1;
        p += ((islb(c) && p[1]) ? 2 : 1);
    }
    return 0;
 }
 /* Wide character version of wild */
 static int
 wwild(WCHAR *start)
 {
@ -157,25 +97,6 @@ wwild(WCHAR *start)
   Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
 */
 static int
 dots(char *start)
 {
    char *p = start;
    while (*p) {
        if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.'
            return 0; // no more dots
        p++; // next char
        while ((*p) == '.') // go to the end of dots
            p++;
        if (*p && (*p != '\\')) // path element does not end with a dot
            p++; // go to the next char
        else
            return 1; // path element does end with a dot - prohibited
    }
    return 0; // no prohibited combinations of dots found
 }
 /* Wide character version of dots */
 static int
 wdots(WCHAR *start)
 {
    WCHAR *p = start;
@ -203,7 +124,6 @@ wdots(WCHAR *start)
   successfully after copying the rest of the original path to the result path.
   Other I/O errors cause an error return.
 */
 int
 lastErrorReportable()
 {
@ -225,222 +145,10 @@ lastErrorReportable()
    return 1;
 }
 int wcanonicalize(WCHAR *orig_path, WCHAR *result, int size);
 /* Convert a pathname to canonical form.  The input orig_path is assumed to
   have been converted to native form already, via JVM_NativePath().  This is
   necessary because _fullpath() rejects duplicate separator characters on
   Win95, though it accepts them on NT. */
 int
 canonicalize(char *orig_path, char *result, int size)
 {
    WIN32_FIND_DATA fd;
    HANDLE h;
    char path[1024];    /* Working copy of path */
    char *src, *dst, *dend;
    wchar_t *worig_path, *wresult;
    size_t converted_chars = 0;
    /* handle long path with length >= MAX_PATH */
    if (strlen(orig_path) >= MAX_PATH) {
        if ((worig_path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
            return -1;
        if (mbstowcs_s(&converted_chars, worig_path, (size_t)size, orig_path, (size_t)(size - 1)) != 0) {
            free(worig_path);
            return -1;
        }
        if ((wresult = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
            return -1;
        if (wcanonicalize(worig_path, wresult, size) != 0) {
            free(worig_path);
            free(wresult);
            return -1;
        }
        if (wcstombs_s(&converted_chars, result, (size_t)size, wresult, (size_t)(size - 1)) != 0) {
            free(worig_path);
            free(wresult);
            return -1;
        }
        free(worig_path);
        free(wresult);
        return 0;
    }
    /* Reject paths that contain wildcards */
    if (wild(orig_path)) {
        errno = EINVAL;
        return -1;
    }
    /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
      contrary to the documentation, the _fullpath procedure does not require
      the drive to be available.  It also does not reliably change all
      occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
    if (!_fullpath(path, orig_path, sizeof(path))) {
        return -1;
    }
    /* Correction for Win95: _fullpath may leave a trailing "\\"
      on a UNC pathname */
    if ((path[0] == '\\') && (path[1] == '\\')) {
        char *p = path + strlen(path);
        if ((p[-1] == '\\') && !islb(p[-2])) {
            p[-1] = '\0';
        }
    }
    if (dots(path)) /* Check for prohibited combinations of dots */
        return -1;
    src = path;            /* Start scanning here */
    dst = result;        /* Place results here */
    dend = dst + size;        /* Don't go to or past here */
    /* Copy prefix, assuming path is absolute */
    if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
        /* Drive specifier */
        *src = toupper(*src);    /* Canonicalize drive letter */
        if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
            return -1;
        }
        src += 2;
    } else if ((src[0] == '\\') && (src[1] == '\\')) {
        /* UNC pathname */
        char *p;
        p = nextsep(src + 2);    /* Skip past host name */
        if (!*p) {
            /* A UNC pathname must begin with "\\\\host\\share",
            so reject this path as invalid if there is no share name */
            errno = EINVAL;
            return -1;
        }
        p = nextsep(p + 1);    /* Skip past share name */
        if (!(dst = cp(dst, dend, '\0', src, p))) {
            return -1;
        }
        src = p;
    } else {
        /* Invalid path */
        errno = EINVAL;
        return -1;
    }
    /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
    /* for root pathes like "E:\" . If the path has this form, we should  */
    /* simply return it, it is already canonicalized. */
    if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
        /* At this point we have already copied the drive specifier ("z:")*/
        /* so we need to copy "\" and the null character. */
        result[2] = '\\';
        result[3] = '\0';
        return 0;
    }
    /* At this point we have copied either a drive specifier ("z:") or a UNC
    prefix ("\\\\host\\share") to the result buffer, and src points to the
    first byte of the remainder of the path.  We now scan through the rest
    of the path, looking up each prefix in order to find the true name of
    the last element of each prefix, thereby computing the full true name of
    the original path. */
    while (*src) {
        char *p = nextsep(src + 1);    /* Find next separator */
        char c = *p;
        assert(*src == '\\');        /* Invariant */
        *p = '\0';            /* Temporarily clear separator */
        h = FindFirstFile(path, &fd);    /* Look up prefix */
        *p = c;                /* Restore separator */
        if (h != INVALID_HANDLE_VALUE) {
            /* Lookup succeeded; append true name to result and continue */
            FindClose(h);
            if (!(dst = cp(dst, dend, '\\',
                fd.cFileName,
                fd.cFileName + strlen(fd.cFileName)))) {
                return -1;
            }
            src = p;
            continue;
        } else {
            if (!lastErrorReportable()) {
                if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
                    return -1;
                }
                break;
            } else {
                return -1;
            }
        }
    }
    if (dst >= dend) {
        errno = ENAMETOOLONG;
        return -1;
    }
    *dst = '\0';
    return 0;
 }
 /* Convert a pathname to canonical form.  The input prefix is assumed
   to be in canonical form already, and the trailing filename must not
   contain any wildcard, dot/double dot, or other "tricky" characters
   that are rejected by the canonicalize() routine above.  This
   routine is present to allow the canonicalization prefix cache to be
   used while still returning canonical names with the correct
   capitalization. */
 int
 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
 {
    WIN32_FIND_DATA fd;
    HANDLE h;
    char *src, *dst, *dend;
    src = pathWithCanonicalPrefix;
    dst = result;        /* Place results here */
    dend = dst + size;   /* Don't go to or past here */
    h = FindFirstFile(pathWithCanonicalPrefix, &fd);    /* Look up file */
    if (h != INVALID_HANDLE_VALUE) {
        /* Lookup succeeded; concatenate true name to prefix */
        FindClose(h);
        if (!(dst = cp(dst, dend, '\0',
                       canonicalPrefix,
                       canonicalPrefix + strlen(canonicalPrefix)))) {
            return -1;
        }
        if (!(dst = cp(dst, dend, '\\',
                       fd.cFileName,
                       fd.cFileName + strlen(fd.cFileName)))) {
            return -1;
        }
    } else {
        if (!lastErrorReportable()) {
            if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
                return -1;
            }
        } else {
            return -1;
        }
    }
    if (dst >= dend) {
        errno = ENAMETOOLONG;
        return -1;
    }
    *dst = '\0';
    return 0;
 }
 /* Wide character version of canonicalize. Size is a wide-character size. */
 int
 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
 {
@ -559,9 +267,13 @@ wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
    return -1;
 }
-
+/* Convert a pathname to canonical form.  The input prefix is assumed
-/* Wide character version of canonicalizeWithPrefix. */
+   to be in canonical form already, and the trailing filename must not
-
+   contain any wildcard, dot/double dot, or other "tricky" characters
   that are rejected by the canonicalize() routine above.  This
   routine is present to allow the canonicalization prefix cache to be
   used while still returning canonical names with the correct
   capitalization. */
 int
 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
 {
@ -613,6 +325,46 @@ wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix,
    return 0;
 }
 /* Non-Wide character version of canonicalize.
   Converts to whchar and delegates to wcanonicalize. */
 int
 canonicalize(char* orig_path, char* result, int size) {
    wchar_t* wpath = NULL;
    wchar_t* wresult = NULL;
    size_t conv;
    size_t path_len = strlen(orig_path);
    int ret = -1;
    if ((wpath = (wchar_t*) malloc(sizeof(wchar_t) * (path_len + 1))) == NULL) {
        goto finish;
    }
    if (mbstowcs_s(&conv, wpath, path_len + 1, orig_path, path_len) != 0) {
        goto finish;
    }
    if ((wresult = (wchar_t*) malloc(sizeof(wchar_t) * size)) == NULL) {
        goto finish;
    }
    if (wcanonicalize(wpath, wresult, size) != 0) {
        goto finish;
    }
    if (wcstombs_s(&conv, result, (size_t) size, wresult, (size_t) (size - 1)) != 0) {
        goto finish;
    }
    // Change return value to success.
    ret = 0;
 finish:
    free(wresult);
    free(wpath);
    return ret;
 }
 /* The appropriate location of getPrefixed() should be io_util_md.c, but
   java.lang.instrument package has hardwired canonicalize_md.c into their
--- a/test/hotspot/jtreg/runtime/LoadClass/LongBCP.java
+++ b/test/hotspot/jtreg/runtime/LoadClass/LongBCP.java
@ -102,8 +102,6 @@ public class LongBCP {
              .shouldHaveExitValue(0);
        // relative path tests
        // We currently cannot handle relative path specified in the
        // -Xbootclasspath/a on windows.
        //
        // relative path length within the file system limit
        int fn_max_length = 255;
@ -124,6 +122,22 @@ public class LongBCP {
        pb = ProcessTools.createJavaProcessBuilder(
            bootCP, "Hello");
        output = new OutputAnalyzer(pb.start());
        output.shouldContain("Hello World")
              .shouldHaveExitValue(0);
        // Test a relative path for a jar file < MAX_PATH, but where the
        // absolute path is > MAX_PATH.
        Path jarDir = Paths.get(".");
        for (int i = 0; i < 21; ++i) {
            jarDir = jarDir.resolve("0123456789");
        }
        Files.createDirectories(jarDir);
        Path jarPath = jarDir.resolve("hello.jar");
        Files.copy(Paths.get(helloJar), jarPath);
        bootCP = "-Xbootclasspath/a:" + jarPath.toString();
        pb = ProcessTools.createJavaProcessBuilder(bootCP, "Hello");
        output = new OutputAnalyzer(pb.start());
        output.shouldContain("Hello World")
              .shouldHaveExitValue(0);