351 lines
13 KiB
Java
351 lines
13 KiB
Java
|
/*
|
||
|
* Copyright 2008 Sun Microsystems, Inc. All Rights Reserved.
|
||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||
|
*
|
||
|
* This code is free software; you can redistribute it and/or modify it
|
||
|
* under the terms of the GNU General Public License version 2 only, as
|
||
|
* published by the Free Software Foundation.
|
||
|
*
|
||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||
|
* accompanied this code).
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License version
|
||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||
|
*
|
||
|
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
|
||
|
* CA 95054 USA or visit www.sun.com if you need additional information or
|
||
|
* have any questions.
|
||
|
*/
|
||
|
|
||
|
/* @test
|
||
|
@bug 4328178
|
||
|
@summary Performs baseline and regression test on the ISCII91 charset
|
||
|
*/
|
||
|
|
||
|
import java.io.*;
|
||
|
|
||
|
public class ISCIITest {
|
||
|
|
||
|
private static void failureReport() {
|
||
|
System.err.println ("Failed ISCII91 Regression Test");
|
||
|
}
|
||
|
|
||
|
private static void mapEquiv(int start,
|
||
|
int end,
|
||
|
String testName)
|
||
|
throws Exception
|
||
|
{
|
||
|
byte[] singleByte = new byte[1];
|
||
|
byte[] encoded = new byte[1];
|
||
|
|
||
|
for (int i = start; i <= end; i++ ) {
|
||
|
singleByte[0] = (byte) i;
|
||
|
try {
|
||
|
String unicodeStr =
|
||
|
new String (singleByte,"ISCII91");
|
||
|
|
||
|
if (i != (int)unicodeStr.charAt(0)) {
|
||
|
System.err.println ("FAILED ISCII91 Regression test"
|
||
|
+ "input byte is " + i );
|
||
|
throw new Exception("");
|
||
|
}
|
||
|
encoded = unicodeStr.getBytes("ISCII91");
|
||
|
|
||
|
if (encoded[0] != singleByte[0]) {
|
||
|
System.err.println("Encoding error " + testName);
|
||
|
throw new Exception("Failed ISCII91 Regression test");
|
||
|
}
|
||
|
|
||
|
} catch (UnsupportedEncodingException e) {
|
||
|
failureReport();
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
private static void checkUnmapped(int start,
|
||
|
int end,
|
||
|
String testName)
|
||
|
throws Exception {
|
||
|
|
||
|
byte[] singleByte = new byte[1];
|
||
|
|
||
|
for (int i = start; i <= end; i++ ) {
|
||
|
singleByte[0] = (byte) i;
|
||
|
try {
|
||
|
String unicodeStr = new String (singleByte, "ISCII91");
|
||
|
|
||
|
if (unicodeStr.charAt(0) != '\uFFFD') {
|
||
|
System.err.println("FAILED " + testName +
|
||
|
"input byte is " + i );
|
||
|
throw new Exception ("Failed ISCII91 regression test");
|
||
|
}
|
||
|
} catch (UnsupportedEncodingException e) {
|
||
|
System.err.println("Unsupported character encoding");
|
||
|
}
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
*
|
||
|
*/
|
||
|
private static void checkRange(int start, int end,
|
||
|
char[] expectChars,
|
||
|
String testName)
|
||
|
throws Exception {
|
||
|
byte[] singleByte = new byte[1];
|
||
|
byte[] encoded = new byte[1];
|
||
|
int lookupOffset = 0;
|
||
|
|
||
|
for (int i=start; i <= end; i++ ) {
|
||
|
singleByte[0] = (byte) i;
|
||
|
String unicodeStr = new String (singleByte, "ISCII91");
|
||
|
if (unicodeStr.charAt(0) != expectChars[lookupOffset++]) {
|
||
|
throw new Exception ("Failed ISCII91 Regression Test");
|
||
|
}
|
||
|
encoded = unicodeStr.getBytes("ISCII");
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Tests the ISCII91 Indic character encoding
|
||
|
* as per IS 13194:1991 Bureau of Indian Standards.
|
||
|
*/
|
||
|
|
||
|
private static void test () throws Exception {
|
||
|
|
||
|
try {
|
||
|
|
||
|
|
||
|
// ISCII91 is an 8-byte encoding which retains the ASCII
|
||
|
// mappings in the lower half.
|
||
|
|
||
|
mapEquiv(0, 0x7f, "7 bit ASCII range");
|
||
|
|
||
|
// Checks a range of characters which are unmappable according
|
||
|
// to the standards.
|
||
|
|
||
|
checkUnmapped(0x81, 0x9f, "UNMAPPED");
|
||
|
|
||
|
// Vowel Modifier chars can be used to modify the vowel
|
||
|
// sound of the preceding consonant, vowel or matra character.
|
||
|
|
||
|
byte[] testByte = new byte[1];
|
||
|
char[] vowelModChars = {
|
||
|
'\u0901', // Vowel modifier Chandrabindu
|
||
|
'\u0902', // Vowel modifier Anuswar
|
||
|
'\u0903' // Vowel modifier Visarg
|
||
|
};
|
||
|
|
||
|
checkRange(0xa1, 0xa3, vowelModChars, "INDIC VOWEL MODIFIER CHARS");
|
||
|
|
||
|
char[] expectChars = {
|
||
|
'\u0905', // a4 -- Vowel A
|
||
|
'\u0906', // a5 -- Vowel AA
|
||
|
'\u0907', // a6 -- Vowel I
|
||
|
'\u0908', // a7 -- Vowel II
|
||
|
'\u0909', // a8 -- Vowel U
|
||
|
'\u090a', // a9 -- Vowel UU
|
||
|
'\u090b', // aa -- Vowel RI
|
||
|
'\u090e', // ab -- Vowel E ( Southern Scripts )
|
||
|
'\u090f', // ac -- Vowel EY
|
||
|
'\u0910', // ad -- Vowel AI
|
||
|
'\u090d', // ae -- Vowel AYE ( Devanagari Script )
|
||
|
'\u0912', // af -- Vowel O ( Southern Scripts )
|
||
|
'\u0913', // b0 -- Vowel OW
|
||
|
'\u0914', // b1 -- Vowel AU
|
||
|
'\u0911', // b2 -- Vowel AWE ( Devanagari Script )
|
||
|
};
|
||
|
|
||
|
checkRange(0xa4, 0xb2, expectChars, "INDIC VOWELS");
|
||
|
|
||
|
char[] expectConsChars =
|
||
|
{
|
||
|
'\u0915', // b3 -- Consonant KA
|
||
|
'\u0916', // b4 -- Consonant KHA
|
||
|
'\u0917', // b5 -- Consonant GA
|
||
|
'\u0918', // b6 -- Consonant GHA
|
||
|
'\u0919', // b7 -- Consonant NGA
|
||
|
'\u091a', // b8 -- Consonant CHA
|
||
|
'\u091b', // b9 -- Consonant CHHA
|
||
|
'\u091c', // ba -- Consonant JA
|
||
|
'\u091d', // bb -- Consonant JHA
|
||
|
'\u091e', // bc -- Consonant JNA
|
||
|
'\u091f', // bd -- Consonant Hard TA
|
||
|
'\u0920', // be -- Consonant Hard THA
|
||
|
'\u0921', // bf -- Consonant Hard DA
|
||
|
'\u0922', // c0 -- Consonant Hard DHA
|
||
|
'\u0923', // c1 -- Consonant Hard NA
|
||
|
'\u0924', // c2 -- Consonant Soft TA
|
||
|
'\u0925', // c3 -- Consonant Soft THA
|
||
|
'\u0926', // c4 -- Consonant Soft DA
|
||
|
'\u0927', // c5 -- Consonant Soft DHA
|
||
|
'\u0928', // c6 -- Consonant Soft NA
|
||
|
'\u0929', // c7 -- Consonant NA ( Tamil )
|
||
|
'\u092a', // c8 -- Consonant PA
|
||
|
'\u092b', // c9 -- Consonant PHA
|
||
|
'\u092c', // ca -- Consonant BA
|
||
|
'\u092d', // cb -- Consonant BHA
|
||
|
'\u092e', // cc -- Consonant MA
|
||
|
'\u092f', // cd -- Consonant YA
|
||
|
'\u095f', // ce -- Consonant JYA ( Bengali, Assamese & Oriya )
|
||
|
'\u0930', // cf -- Consonant RA
|
||
|
'\u0931', // d0 -- Consonant Hard RA ( Southern Scripts )
|
||
|
'\u0932', // d1 -- Consonant LA
|
||
|
'\u0933', // d2 -- Consonant Hard LA
|
||
|
'\u0934', // d3 -- Consonant ZHA ( Tamil & Malayalam )
|
||
|
'\u0935', // d4 -- Consonant VA
|
||
|
'\u0936', // d5 -- Consonant SHA
|
||
|
'\u0937', // d6 -- Consonant Hard SHA
|
||
|
'\u0938', // d7 -- Consonant SA
|
||
|
'\u0939', // d8 -- Consonant HA
|
||
|
};
|
||
|
|
||
|
checkRange(0xb3, 0xd8, expectConsChars, "INDIC CONSONANTS");
|
||
|
|
||
|
char[] matraChars = {
|
||
|
'\u093e', // da -- Vowel Sign AA
|
||
|
'\u093f', // db -- Vowel Sign I
|
||
|
'\u0940', // dc -- Vowel Sign II
|
||
|
'\u0941', // dd -- Vowel Sign U
|
||
|
'\u0942', // de -- Vowel Sign UU
|
||
|
'\u0943', // df -- Vowel Sign RI
|
||
|
'\u0946', // e0 -- Vowel Sign E ( Southern Scripts )
|
||
|
'\u0947', // e1 -- Vowel Sign EY
|
||
|
'\u0948', // e2 -- Vowel Sign AI
|
||
|
'\u0945', // e3 -- Vowel Sign AYE ( Devanagari Script )
|
||
|
'\u094a', // e4 -- Vowel Sign O ( Southern Scripts )
|
||
|
'\u094b', // e5 -- Vowel Sign OW
|
||
|
'\u094c', // e6 -- Vowel Sign AU
|
||
|
'\u0949' // e7 -- Vowel Sign AWE ( Devanagari Script )
|
||
|
};
|
||
|
|
||
|
// Matras or Vowel signs alter the implicit
|
||
|
// vowel sound associated with an Indic consonant.
|
||
|
|
||
|
checkRange(0xda, 0xe7, matraChars, "INDIC MATRAS");
|
||
|
|
||
|
char[] loneContextModifierChars = {
|
||
|
'\u094d', // e8 -- Vowel Omission Sign ( Halant )
|
||
|
'\u093c', // e9 -- Diacritic Sign ( Nukta )
|
||
|
'\u0964' // ea -- Full Stop ( Viram, Northern Scripts )
|
||
|
};
|
||
|
|
||
|
checkRange(0xe8, 0xea,
|
||
|
loneContextModifierChars, "LONE INDIC CONTEXT CHARS");
|
||
|
|
||
|
|
||
|
// Test Indic script numeral chars
|
||
|
// (as opposed to international numerals)
|
||
|
|
||
|
char[] expectNumeralChars =
|
||
|
{
|
||
|
'\u0966', // f1 -- Digit 0
|
||
|
'\u0967', // f2 -- Digit 1
|
||
|
'\u0968', // f3 -- Digit 2
|
||
|
'\u0969', // f4 -- Digit 3
|
||
|
'\u096a', // f5 -- Digit 4
|
||
|
'\u096b', // f6 -- Digit 5
|
||
|
'\u096c', // f7 -- Digit 6
|
||
|
'\u096d', // f8 -- Digit 7
|
||
|
'\u096e', // f9 -- Digit 8
|
||
|
'\u096f' // fa -- Digit 9
|
||
|
};
|
||
|
|
||
|
checkRange(0xf1, 0xfa,
|
||
|
expectNumeralChars, "NUMERAL/DIGIT CHARACTERS");
|
||
|
int lookupOffset = 0;
|
||
|
|
||
|
char[] expectNuktaSub = {
|
||
|
'\u0950',
|
||
|
'\u090c',
|
||
|
'\u0961',
|
||
|
'\u0960',
|
||
|
'\u0962',
|
||
|
'\u0963',
|
||
|
'\u0944',
|
||
|
'\u093d'
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* ISCII uses a number of code extension techniques
|
||
|
* to access a number of lesser used characters.
|
||
|
* The Nukta character which ordinarily signifies
|
||
|
* a diacritic is used in combination with existing
|
||
|
* characters to escape them to a different character.
|
||
|
* value.
|
||
|
*/
|
||
|
|
||
|
byte[] codeExtensionBytes = {
|
||
|
(byte)0xa1 , (byte)0xe9, // Chandrabindu + Nukta
|
||
|
// =>DEVANAGARI OM SIGN
|
||
|
(byte)0xa6 , (byte)0xe9, // Vowel I + Nukta
|
||
|
// => DEVANAGARI VOCALIC L
|
||
|
(byte)0xa7 , (byte)0xe9, // Vowel II + Nukta
|
||
|
// => DEVANAGARI VOCALIC LL
|
||
|
(byte)0xaa , (byte)0xe9, // Vowel RI + Nukta
|
||
|
// => DEVANAGARI VOCALIC RR
|
||
|
(byte)0xdb , (byte)0xe9, // Vowel sign I + Nukta
|
||
|
// => DEVANAGARI VOWEL SIGN VOCALIC L
|
||
|
(byte)0xdc , (byte)0xe9, // Vowel sign II + Nukta
|
||
|
// => DEVANAGARI VOWEL SIGN VOCALIC LL
|
||
|
|
||
|
(byte)0xdf , (byte)0xe9, // Vowel sign Vocalic R + Nukta
|
||
|
// => DEVANAGARI VOWEL SIGN VOCALIC RR
|
||
|
(byte)0xea , (byte)0xe9 // Full stop/Phrase separator + Nukta
|
||
|
// => DEVANAGARI SIGN AVAGRAHA
|
||
|
};
|
||
|
|
||
|
lookupOffset = 0;
|
||
|
byte[] bytePair = new byte[2];
|
||
|
|
||
|
for (int i=0; i < (codeExtensionBytes.length)/2; i++ ) {
|
||
|
bytePair[0] = (byte) codeExtensionBytes[lookupOffset++];
|
||
|
bytePair[1] = (byte) codeExtensionBytes[lookupOffset++];
|
||
|
|
||
|
String unicodeStr = new String (bytePair,"ISCII91");
|
||
|
if (unicodeStr.charAt(0) != expectNuktaSub[i]) {
|
||
|
throw new Exception("Failed Nukta Sub");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
lookupOffset = 0;
|
||
|
byte[] comboBytes = {
|
||
|
(byte)0xe8 , (byte)0xe8, //HALANT + HALANT
|
||
|
(byte)0xe8 , (byte)0xe9 //HALANT + NUKTA aka. Soft Halant
|
||
|
};
|
||
|
char[] expectCombChars = {
|
||
|
'\u094d',
|
||
|
'\u200c',
|
||
|
'\u094d',
|
||
|
'\u200d'
|
||
|
};
|
||
|
|
||
|
for (int i=0; i < (comboBytes.length)/2; i++ ) {
|
||
|
bytePair[0] = (byte) comboBytes[lookupOffset++];
|
||
|
bytePair[1] = (byte) comboBytes[lookupOffset];
|
||
|
String unicodeStr = new String (bytePair, "ISCII91");
|
||
|
if (unicodeStr.charAt(0) != expectCombChars[lookupOffset-1]
|
||
|
&& unicodeStr.charAt(1) != expectCombChars[lookupOffset]) {
|
||
|
throw new Exception("Failed ISCII91 Regression Test");
|
||
|
}
|
||
|
lookupOffset++;
|
||
|
}
|
||
|
|
||
|
} catch (UnsupportedEncodingException e) {
|
||
|
System.err.println ("ISCII91 encoding not supported");
|
||
|
throw new Exception ("Failed ISCII91 Regression Test");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public static void main (String[] args) throws Exception {
|
||
|
test();
|
||
|
}
|
||
|
}
|