8160199: Language's script should be reflected in user.script on Mac OS X

Reviewed-by: bchristi, sherman
This commit is contained in:
Naoto Sato 2017-07-03 12:54:24 -07:00
parent 0a79d06e97
commit f5388e93ad
3 changed files with 102 additions and 49 deletions

View File

@ -47,6 +47,7 @@ char *getPosixLocale(int cat) {
#define LOCALEIDLENGTH 128
char *getMacOSXLocale(int cat) {
const char* retVal = NULL;
char localeString[LOCALEIDLENGTH];
switch (cat) {
case LC_MESSAGES:
@ -74,73 +75,114 @@ char *getMacOSXLocale(int cat) {
}
CFRelease(languages);
retVal = languageString;
// Explicitly supply region, if there is none
char *hyphenPos = strchr(languageString, '-');
int langStrLen = strlen(languageString);
// Special case for Portuguese in Brazil:
// The language code needs the "_BR" region code (to distinguish it
// from Portuguese in Portugal), but this is missing when using the
// "Portuguese (Brazil)" language.
// If language is "pt" and the current locale is pt_BR, return pt_BR.
char localeString[LOCALEIDLENGTH];
if (strcmp(retVal, "pt") == 0 &&
CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding()) &&
strcmp(localeString, "pt_BR") == 0) {
retVal = localeString;
if (hyphenPos == NULL || // languageString contains ISO639 only, e.g., "en"
languageString + langStrLen - hyphenPos == 5) { // ISO639-ScriptCode, e.g., "en-Latn"
CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding());
char *underscorePos = strrchr(localeString, '_');
char *region = NULL;
if (underscorePos != NULL) {
region = underscorePos + 1;
}
if (region != NULL) {
strcat(languageString, "-");
strcat(languageString, region);
}
}
retVal = languageString;
}
break;
default:
{
char localeString[LOCALEIDLENGTH];
if (!CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding())) {
return NULL;
}
retVal = localeString;
}
break;
}
if (retVal != NULL) {
// Language IDs use the language designators and (optional) region
// and script designators of BCP 47. So possible formats are:
//
// "en" (language designator only)
// "haw" (3-letter lanuage designator)
// "en-GB" (language with alpha-2 region designator)
// "es-419" (language with 3-digit UN M.49 area code)
// "zh-Hans" (language with ISO 15924 script designator)
// "zh-Hans-US" (language with ISO 15924 script designator and region)
// "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
//
// In the case of region designators (alpha-2 and/or UN M.49), we convert
// to our locale string format by changing '-' to '_'. That is, if
// the '-' is followed by fewer than 4 chars.
char* scriptOrRegion = strchr(retVal, '-');
if (scriptOrRegion != NULL) {
int length = strlen(scriptOrRegion);
if (length > 5) {
// Region and script both exist. Honor the script for now
scriptOrRegion[5] = '\0';
} else if (length < 5) {
*scriptOrRegion = '_';
return strdup(convertToPOSIXLocale(retVal));
}
assert((length == 3 &&
// '-' followed by a 2 character region designator
isalpha(scriptOrRegion[1]) &&
isalpha(scriptOrRegion[2])) ||
(length == 4 &&
// '-' followed by a 3-digit UN M.49 area code
isdigit(scriptOrRegion[1]) &&
isdigit(scriptOrRegion[2]) &&
isdigit(scriptOrRegion[3])));
}
return NULL;
}
/* Language IDs use the language designators and (optional) region
* and script designators of BCP 47. So possible formats are:
*
* "en" (language designator only)
* "haw" (3-letter lanuage designator)
* "en-GB" (language with alpha-2 region designator)
* "es-419" (language with 3-digit UN M.49 area code)
* "zh-Hans" (language with ISO 15924 script designator)
* "zh-Hans-US" (language with ISO 15924 script designator and region)
* "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
*
* convert these tags into POSIX conforming locale string, i.e.,
* lang{_region}{@script}. e.g., for "zh-Hans-US" into "zh_US@Hans"
*/
const char * convertToPOSIXLocale(const char* src) {
char* scriptRegion = strchr(src, '-');
if (scriptRegion != NULL) {
int length = strlen(scriptRegion);
char* region = strchr(scriptRegion + 1, '-');
char* atMark = NULL;
if (region == NULL) {
// CFLocaleGetIdentifier() returns '_' before region
region = strchr(scriptRegion + 1, '_');
}
return strdup(retVal);
*scriptRegion = '_';
if (length > 5) {
// Region and script both exist.
char tmpScript[4];
int regionLength = length - 6;
atMark = scriptRegion + 1 + regionLength;
memcpy(tmpScript, scriptRegion + 1, 4);
memmove(scriptRegion + 1, region + 1, regionLength);
memcpy(atMark + 1, tmpScript, 4);
} else if (length == 5) {
// script only
atMark = scriptRegion;
}
if (atMark != NULL) {
*atMark = '@';
// assert script code
assert(isalpha(atMark[1]) &&
isalpha(atMark[2]) &&
isalpha(atMark[3]) &&
isalpha(atMark[4]));
}
assert(((length == 3 || length == 8) &&
// '_' followed by a 2 character region designator
isalpha(scriptRegion[1]) &&
isalpha(scriptRegion[2])) ||
((length == 4 || length == 9) &&
// '_' followed by a 3-digit UN M.49 area code
isdigit(scriptRegion[1]) &&
isdigit(scriptRegion[2]) &&
isdigit(scriptRegion[3])) ||
// '@' followed by a 4 character script code (already validated above)
(length == 5));
}
return NULL;
return src;
}
char *setupMacOSXLocale(int cat) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -26,6 +26,7 @@
#include "java_props.h"
char *setupMacOSXLocale(int cat);
const char *convertToPOSIXLocale(const char* src);
void setOSNameAndVersion(java_props_t *sprops);
void setUserHome(java_props_t *sprops);
void setProxyProperties(java_props_t *sProps);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -211,6 +211,16 @@ static char *script_names[] = {
"iqtelif", "Latn",
"latin", "Latn",
#endif
"Arab", "Arab",
"Cyrl", "Cyrl",
"Deva", "Deva",
"Ethi", "Ethi",
"Hans", "Hans",
"Hant", "Hant",
"Latn", "Latn",
"Sund", "Sund",
"Syrc", "Syrc",
"Tfng", "Tfng",
"", "",
};