8160199: Language's script should be reflected in user.script on Mac OS X

Reviewed-by: bchristi, sherman
This commit is contained in:
Naoto Sato 2017-07-03 12:54:24 -07:00
parent 0a79d06e97
commit f5388e93ad
3 changed files with 102 additions and 49 deletions

View File

@ -47,6 +47,7 @@ char *getPosixLocale(int cat) {
#define LOCALEIDLENGTH 128 #define LOCALEIDLENGTH 128
char *getMacOSXLocale(int cat) { char *getMacOSXLocale(int cat) {
const char* retVal = NULL; const char* retVal = NULL;
char localeString[LOCALEIDLENGTH];
switch (cat) { switch (cat) {
case LC_MESSAGES: case LC_MESSAGES:
@ -74,73 +75,114 @@ char *getMacOSXLocale(int cat) {
} }
CFRelease(languages); CFRelease(languages);
retVal = languageString; // Explicitly supply region, if there is none
char *hyphenPos = strchr(languageString, '-');
int langStrLen = strlen(languageString);
// Special case for Portuguese in Brazil: if (hyphenPos == NULL || // languageString contains ISO639 only, e.g., "en"
// The language code needs the "_BR" region code (to distinguish it languageString + langStrLen - hyphenPos == 5) { // ISO639-ScriptCode, e.g., "en-Latn"
// from Portuguese in Portugal), but this is missing when using the CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
// "Portuguese (Brazil)" language. localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding());
// If language is "pt" and the current locale is pt_BR, return pt_BR. char *underscorePos = strrchr(localeString, '_');
char localeString[LOCALEIDLENGTH]; char *region = NULL;
if (strcmp(retVal, "pt") == 0 &&
CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()), if (underscorePos != NULL) {
localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding()) && region = underscorePos + 1;
strcmp(localeString, "pt_BR") == 0) { }
retVal = localeString;
if (region != NULL) {
strcat(languageString, "-");
strcat(languageString, region);
}
} }
retVal = languageString;
} }
break; break;
default: default:
{ {
char localeString[LOCALEIDLENGTH];
if (!CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()), if (!CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding())) { localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding())) {
return NULL; return NULL;
} }
retVal = localeString; retVal = localeString;
} }
break; break;
} }
if (retVal != NULL) { if (retVal != NULL) {
// Language IDs use the language designators and (optional) region return strdup(convertToPOSIXLocale(retVal));
// and script designators of BCP 47. So possible formats are: }
//
// "en" (language designator only)
// "haw" (3-letter lanuage designator)
// "en-GB" (language with alpha-2 region designator)
// "es-419" (language with 3-digit UN M.49 area code)
// "zh-Hans" (language with ISO 15924 script designator)
// "zh-Hans-US" (language with ISO 15924 script designator and region)
// "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
//
// In the case of region designators (alpha-2 and/or UN M.49), we convert
// to our locale string format by changing '-' to '_'. That is, if
// the '-' is followed by fewer than 4 chars.
char* scriptOrRegion = strchr(retVal, '-');
if (scriptOrRegion != NULL) {
int length = strlen(scriptOrRegion);
if (length > 5) {
// Region and script both exist. Honor the script for now
scriptOrRegion[5] = '\0';
} else if (length < 5) {
*scriptOrRegion = '_';
assert((length == 3 && return NULL;
// '-' followed by a 2 character region designator }
isalpha(scriptOrRegion[1]) &&
isalpha(scriptOrRegion[2])) || /* Language IDs use the language designators and (optional) region
(length == 4 && * and script designators of BCP 47. So possible formats are:
// '-' followed by a 3-digit UN M.49 area code *
isdigit(scriptOrRegion[1]) && * "en" (language designator only)
isdigit(scriptOrRegion[2]) && * "haw" (3-letter lanuage designator)
isdigit(scriptOrRegion[3]))); * "en-GB" (language with alpha-2 region designator)
} * "es-419" (language with 3-digit UN M.49 area code)
* "zh-Hans" (language with ISO 15924 script designator)
* "zh-Hans-US" (language with ISO 15924 script designator and region)
* "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
*
* convert these tags into POSIX conforming locale string, i.e.,
* lang{_region}{@script}. e.g., for "zh-Hans-US" into "zh_US@Hans"
*/
const char * convertToPOSIXLocale(const char* src) {
char* scriptRegion = strchr(src, '-');
if (scriptRegion != NULL) {
int length = strlen(scriptRegion);
char* region = strchr(scriptRegion + 1, '-');
char* atMark = NULL;
if (region == NULL) {
// CFLocaleGetIdentifier() returns '_' before region
region = strchr(scriptRegion + 1, '_');
} }
return strdup(retVal); *scriptRegion = '_';
if (length > 5) {
// Region and script both exist.
char tmpScript[4];
int regionLength = length - 6;
atMark = scriptRegion + 1 + regionLength;
memcpy(tmpScript, scriptRegion + 1, 4);
memmove(scriptRegion + 1, region + 1, regionLength);
memcpy(atMark + 1, tmpScript, 4);
} else if (length == 5) {
// script only
atMark = scriptRegion;
}
if (atMark != NULL) {
*atMark = '@';
// assert script code
assert(isalpha(atMark[1]) &&
isalpha(atMark[2]) &&
isalpha(atMark[3]) &&
isalpha(atMark[4]));
}
assert(((length == 3 || length == 8) &&
// '_' followed by a 2 character region designator
isalpha(scriptRegion[1]) &&
isalpha(scriptRegion[2])) ||
((length == 4 || length == 9) &&
// '_' followed by a 3-digit UN M.49 area code
isdigit(scriptRegion[1]) &&
isdigit(scriptRegion[2]) &&
isdigit(scriptRegion[3])) ||
// '@' followed by a 4 character script code (already validated above)
(length == 5));
} }
return NULL;
return src;
} }
char *setupMacOSXLocale(int cat) { char *setupMacOSXLocale(int cat) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -26,6 +26,7 @@
#include "java_props.h" #include "java_props.h"
char *setupMacOSXLocale(int cat); char *setupMacOSXLocale(int cat);
const char *convertToPOSIXLocale(const char* src);
void setOSNameAndVersion(java_props_t *sprops); void setOSNameAndVersion(java_props_t *sprops);
void setUserHome(java_props_t *sprops); void setUserHome(java_props_t *sprops);
void setProxyProperties(java_props_t *sProps); void setProxyProperties(java_props_t *sProps);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -211,6 +211,16 @@ static char *script_names[] = {
"iqtelif", "Latn", "iqtelif", "Latn",
"latin", "Latn", "latin", "Latn",
#endif #endif
"Arab", "Arab",
"Cyrl", "Cyrl",
"Deva", "Deva",
"Ethi", "Ethi",
"Hans", "Hans",
"Hant", "Hant",
"Latn", "Latn",
"Sund", "Sund",
"Syrc", "Syrc",
"Tfng", "Tfng",
"", "", "", "",
}; };