From f5388e93adfc849fb02f900c5f6d1dbd6eb7e68a Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Mon, 3 Jul 2017 12:54:24 -0700 Subject: [PATCH] 8160199: Language's script should be reflected in user.script on Mac OS X Reviewed-by: bchristi, sherman --- .../macosx/native/libjava/java_props_macosx.c | 136 ++++++++++++------ .../macosx/native/libjava/java_props_macosx.h | 3 +- .../unix/native/libjava/locale_str.h | 12 +- 3 files changed, 102 insertions(+), 49 deletions(-) diff --git a/jdk/src/java.base/macosx/native/libjava/java_props_macosx.c b/jdk/src/java.base/macosx/native/libjava/java_props_macosx.c index b11a4d00143..4c7ae30c760 100644 --- a/jdk/src/java.base/macosx/native/libjava/java_props_macosx.c +++ b/jdk/src/java.base/macosx/native/libjava/java_props_macosx.c @@ -47,6 +47,7 @@ char *getPosixLocale(int cat) { #define LOCALEIDLENGTH 128 char *getMacOSXLocale(int cat) { const char* retVal = NULL; + char localeString[LOCALEIDLENGTH]; switch (cat) { case LC_MESSAGES: @@ -74,73 +75,114 @@ char *getMacOSXLocale(int cat) { } CFRelease(languages); - retVal = languageString; + // Explicitly supply region, if there is none + char *hyphenPos = strchr(languageString, '-'); + int langStrLen = strlen(languageString); - // Special case for Portuguese in Brazil: - // The language code needs the "_BR" region code (to distinguish it - // from Portuguese in Portugal), but this is missing when using the - // "Portuguese (Brazil)" language. - // If language is "pt" and the current locale is pt_BR, return pt_BR. - char localeString[LOCALEIDLENGTH]; - if (strcmp(retVal, "pt") == 0 && - CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()), - localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding()) && - strcmp(localeString, "pt_BR") == 0) { - retVal = localeString; + if (hyphenPos == NULL || // languageString contains ISO639 only, e.g., "en" + languageString + langStrLen - hyphenPos == 5) { // ISO639-ScriptCode, e.g., "en-Latn" + CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()), + localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding()); + char *underscorePos = strrchr(localeString, '_'); + char *region = NULL; + + if (underscorePos != NULL) { + region = underscorePos + 1; + } + + if (region != NULL) { + strcat(languageString, "-"); + strcat(languageString, region); + } } + + retVal = languageString; } break; + default: { - char localeString[LOCALEIDLENGTH]; if (!CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()), localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding())) { return NULL; } + retVal = localeString; } break; } if (retVal != NULL) { - // Language IDs use the language designators and (optional) region - // and script designators of BCP 47. So possible formats are: - // - // "en" (language designator only) - // "haw" (3-letter lanuage designator) - // "en-GB" (language with alpha-2 region designator) - // "es-419" (language with 3-digit UN M.49 area code) - // "zh-Hans" (language with ISO 15924 script designator) - // "zh-Hans-US" (language with ISO 15924 script designator and region) - // "zh-Hans-419" (language with ISO 15924 script designator and UN M.49) - // - // In the case of region designators (alpha-2 and/or UN M.49), we convert - // to our locale string format by changing '-' to '_'. That is, if - // the '-' is followed by fewer than 4 chars. - char* scriptOrRegion = strchr(retVal, '-'); - if (scriptOrRegion != NULL) { - int length = strlen(scriptOrRegion); - if (length > 5) { - // Region and script both exist. Honor the script for now - scriptOrRegion[5] = '\0'; - } else if (length < 5) { - *scriptOrRegion = '_'; + return strdup(convertToPOSIXLocale(retVal)); + } - assert((length == 3 && - // '-' followed by a 2 character region designator - isalpha(scriptOrRegion[1]) && - isalpha(scriptOrRegion[2])) || - (length == 4 && - // '-' followed by a 3-digit UN M.49 area code - isdigit(scriptOrRegion[1]) && - isdigit(scriptOrRegion[2]) && - isdigit(scriptOrRegion[3]))); - } + return NULL; +} + +/* Language IDs use the language designators and (optional) region + * and script designators of BCP 47. So possible formats are: + * + * "en" (language designator only) + * "haw" (3-letter lanuage designator) + * "en-GB" (language with alpha-2 region designator) + * "es-419" (language with 3-digit UN M.49 area code) + * "zh-Hans" (language with ISO 15924 script designator) + * "zh-Hans-US" (language with ISO 15924 script designator and region) + * "zh-Hans-419" (language with ISO 15924 script designator and UN M.49) + * + * convert these tags into POSIX conforming locale string, i.e., + * lang{_region}{@script}. e.g., for "zh-Hans-US" into "zh_US@Hans" + */ +const char * convertToPOSIXLocale(const char* src) { + char* scriptRegion = strchr(src, '-'); + if (scriptRegion != NULL) { + int length = strlen(scriptRegion); + char* region = strchr(scriptRegion + 1, '-'); + char* atMark = NULL; + + if (region == NULL) { + // CFLocaleGetIdentifier() returns '_' before region + region = strchr(scriptRegion + 1, '_'); } - return strdup(retVal); + *scriptRegion = '_'; + if (length > 5) { + // Region and script both exist. + char tmpScript[4]; + int regionLength = length - 6; + atMark = scriptRegion + 1 + regionLength; + memcpy(tmpScript, scriptRegion + 1, 4); + memmove(scriptRegion + 1, region + 1, regionLength); + memcpy(atMark + 1, tmpScript, 4); + } else if (length == 5) { + // script only + atMark = scriptRegion; + } + + if (atMark != NULL) { + *atMark = '@'; + + // assert script code + assert(isalpha(atMark[1]) && + isalpha(atMark[2]) && + isalpha(atMark[3]) && + isalpha(atMark[4])); + } + + assert(((length == 3 || length == 8) && + // '_' followed by a 2 character region designator + isalpha(scriptRegion[1]) && + isalpha(scriptRegion[2])) || + ((length == 4 || length == 9) && + // '_' followed by a 3-digit UN M.49 area code + isdigit(scriptRegion[1]) && + isdigit(scriptRegion[2]) && + isdigit(scriptRegion[3])) || + // '@' followed by a 4 character script code (already validated above) + (length == 5)); } - return NULL; + + return src; } char *setupMacOSXLocale(int cat) { diff --git a/jdk/src/java.base/macosx/native/libjava/java_props_macosx.h b/jdk/src/java.base/macosx/native/libjava/java_props_macosx.h index e5af6ff001c..db46d630fbe 100644 --- a/jdk/src/java.base/macosx/native/libjava/java_props_macosx.h +++ b/jdk/src/java.base/macosx/native/libjava/java_props_macosx.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #include "java_props.h" char *setupMacOSXLocale(int cat); +const char *convertToPOSIXLocale(const char* src); void setOSNameAndVersion(java_props_t *sprops); void setUserHome(java_props_t *sprops); void setProxyProperties(java_props_t *sProps); diff --git a/jdk/src/java.base/unix/native/libjava/locale_str.h b/jdk/src/java.base/unix/native/libjava/locale_str.h index 684f796662a..e59e3c09e69 100644 --- a/jdk/src/java.base/unix/native/libjava/locale_str.h +++ b/jdk/src/java.base/unix/native/libjava/locale_str.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -211,6 +211,16 @@ static char *script_names[] = { "iqtelif", "Latn", "latin", "Latn", #endif + "Arab", "Arab", + "Cyrl", "Cyrl", + "Deva", "Deva", + "Ethi", "Ethi", + "Hans", "Hans", + "Hant", "Hant", + "Latn", "Latn", + "Sund", "Sund", + "Syrc", "Syrc", + "Tfng", "Tfng", "", "", };