1 /*
  2  * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 /*
 24  * @test
 25  * @bug  4221795 8032446 8174270
 26  * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
 27  * src/com/ibm/icu/dev/test and modified.
 28  * @modules java.base/sun.text java.base/jdk.internal.icu.text
 29  * @library /java/text/testlib
 30  * @compile -XDignore.symbol.file ICUBasicTest.java
 31  * @run main/timeout=30 ICUBasicTest
 32  */
 33 
 34 /*
 35  *******************************************************************************
 36  * Copyright (C) 1996-2004, International Business Machines Corporation and    *
 37  * others. All Rights Reserved.                                                *
 38  *******************************************************************************
 39  */
 40 
 41 import sun.text.Normalizer;
 42 import jdk.internal.icu.text.NormalizerBase;
 43 
 44 import java.util.HexFormat;
 45 
 46 import static java.text.Normalizer.Form.*;
 47 
 48 public class ICUBasicTest extends IntlTest {
 49 
 50     public static void main(String[] args) throws Exception {
 51         new ICUBasicTest().run(args);
 52     }
 53 
 54     /*
 55      * Normalization modes
 56      */
 57     private static final NormalizerBase.Mode NFCmode  = NormalizerBase.NFC;
 58     private static final NormalizerBase.Mode NFDmode  = NormalizerBase.NFD;
 59     private static final NormalizerBase.Mode NFKCmode = NormalizerBase.NFKC;
 60     private static final NormalizerBase.Mode NFKDmode = NormalizerBase.NFKD;
 61     private static final NormalizerBase.Mode NONEmode = NormalizerBase.NONE;
 62 
 63     /*
 64      * Normalization options
 65      */
 66 
 67     /* Normal Unicode versions */
 68     private static final int UNICODE_3_2_0  = Normalizer.UNICODE_3_2;
 69     private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
 70 
 71     /*
 72      * Special cases for UAX #15 bug
 73      * see Unicode Public Review Issue #29
 74      * at http://www.unicode.org/review/resolved-pri.html#pri29
 75      *
 76      * Note:
 77      *   PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are
 78      *   different for earlier Unicode versions.
 79      */
 80     public void TestComposition() {
 81 
 82         final TestCompositionCase cases[] = new TestCompositionCase[] {
 83             new TestCompositionCase(NFC, UNICODE_3_2_0,
 84                 "\u1100\u0300\u1161\u0327",
 85                 "\u1100\u0300\u1161\u0327"),
 86             new TestCompositionCase(NFC, UNICODE_LATEST,
 87                 "\u1100\u0300\u1161\u0327",
 88                 "\u1100\u0300\u1161\u0327"),
 89 
 90             new TestCompositionCase(NFC, UNICODE_3_2_0,
 91                 "\u1100\u0300\u1161\u0327\u11a8",
 92                 "\u1100\u0300\u1161\u0327\u11a8"),
 93             new TestCompositionCase(NFC, UNICODE_LATEST,
 94                 "\u1100\u0300\u1161\u0327\u11a8",
 95                 "\u1100\u0300\u1161\u0327\u11a8"),
 96 
 97             new TestCompositionCase(NFC, UNICODE_3_2_0,
 98                 "\uac00\u0300\u0327\u11a8",
 99                 "\uac00\u0327\u0300\u11a8"),
100             new TestCompositionCase(NFC, UNICODE_LATEST,
101                 "\uac00\u0300\u0327\u11a8",
102                 "\uac00\u0327\u0300\u11a8"),
103 
104             new TestCompositionCase(NFC, UNICODE_3_2_0,
105                 "\u0b47\u0300\u0b3e",
106                 "\u0b47\u0300\u0b3e"),
107             new TestCompositionCase(NFC, UNICODE_LATEST,
108                 "\u0b47\u0300\u0b3e",
109                 "\u0b47\u0300\u0b3e"),
110         };
111 
112         String output;
113         int i, length;
114 
115         for (i=0; i<cases.length; ++i) {
116             output = Normalizer.normalize(cases[i].input,
117                                           cases[i].form, cases[i].options);
118             if (!output.equals(cases[i].expect)) {
119                 errln("unexpected result for case " + i + ". Expected="
120                       + cases[i].expect + ", Actual=" + output);
121             } else if (verbose) {
122                 logln("expected result for case " + i + ". Expected="
123                       + cases[i].expect + ", Actual=" + output);
124             }
125         }
126     }
127 
128     private final static class TestCompositionCase {
129         public java.text.Normalizer.Form form;
130         public int options;
131         public String input, expect;
132 
133         TestCompositionCase(java.text.Normalizer.Form form,
134                             int options,
135                             String input,
136                             String expect) {
137             this.form    = form;
138             this.options = options;
139             this.input   = input;
140             this.expect  = expect;
141         }
142     }
143 
144     /*
145      * Added in order to detect a regression.
146      */
147     public void TestCombiningMarks() {
148         String src      = "\u0f71\u0f72\u0f73\u0f74\u0f75";
149         String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
150         String result   = NormalizerBase.normalize(src, NFD);
151 
152         if (!expected.equals(result)) {
153             errln("Reordering of combining marks failed. Expected: " +
154                   HexFormat.of().withDelimiter(" ").formatHex(expected.getBytes())
155                     + " Got: "+ HexFormat.of().withDelimiter(" ").formatHex(result.getBytes()));
156         }
157     }
158 
159     /*
160      * Added in order to detect a regression.
161      */
162     public void TestBengali() throws Exception {
163         String input = "\u09bc\u09be\u09cd\u09be";
164         String output=NormalizerBase.normalize(input, NFC);
165 
166         if (!input.equals(output)) {
167              errln("ERROR in NFC of string");
168         }
169         return;
170     }
171 
172 
173     /*
174      * Added in order to detect a regression.
175      */
176     /**
177      * Test for a problem found by Verisign.  Problem is that
178      * characters at the start of a string are not put in canonical
179      * order correctly by compose() if there is no starter.
180      */
181     public void TestVerisign() throws Exception {
182         String[] inputs = {
183             "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
184             "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
185         };
186         String[] outputs = {
187             "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
188             "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
189         };
190 
191         for (int i = 0; i < inputs.length; ++i) {
192             String input = inputs[i];
193             String output = outputs[i];
194 
195             String result = NormalizerBase.normalize(input, NFD);
196             if (!result.equals(output)) {
197                 errln("FAIL input: " + HexFormat.of().withDelimiter(" ")
198                         .formatHex(input.getBytes()) + "\n" +
199                       " decompose: " + HexFormat.of().withDelimiter(" ")
200                         .formatHex(result.getBytes()) + "\n" +
201                       "  expected: " + HexFormat.of().withDelimiter(" ")
202                         .formatHex(output.getBytes()));
203             }
204 
205             result = NormalizerBase.normalize(input, NFC);
206             if (!result.equals(output)) {
207                 errln("FAIL input: " + HexFormat.of().withDelimiter(" ")
208                         .formatHex(input.getBytes()) + "\n" +
209                       "   compose: " + HexFormat.of().withDelimiter(" ")
210                         .formatHex(output.getBytes()) + "\n" +
211                       "  expected: " + HexFormat.of().withDelimiter(" ")
212                         .formatHex(output.getBytes()));
213             }
214         }
215     }
216 
217     /**
218      * Test for a problem that showed up just before ICU 1.6 release
219      * having to do with combining characters with an index of zero.
220      * Such characters do not participate in any canonical
221      * decompositions.  However, having an index of zero means that
222      * they all share one typeMask[] entry, that is, they all have to
223      * map to the same canonical class, which is not the case, in
224      * reality.
225      */
226     public void TestZeroIndex() throws Exception {
227         String[] DATA = {
228             // Expect col1 x COMPOSE_COMPAT => col2
229             // Expect col2 x DECOMP => col3
230             "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
231             "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
232             "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
233             "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
234             "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
235         };
236 
237         for (int i=0; i<DATA.length; i+=3) {
238             String a = DATA[i];
239             String b = NormalizerBase.normalize(a, NFKC);
240             String exp = DATA[i+1];
241 
242             if (b.equals(exp)) {
243                 logln("Ok: " + HexFormat.of().withDelimiter(" ")
244                         .formatHex(a.getBytes()) + " x COMPOSE_COMPAT => " +
245                       HexFormat.of().withDelimiter(" ")
246                               .formatHex(b.getBytes()));
247             } else {
248                 errln("FAIL: " + HexFormat.of().withDelimiter(" ")
249                         .formatHex(b.getBytes()) + " x COMPOSE_COMPAT => " +
250                       HexFormat.of().withDelimiter(" ")
251                               .formatHex(a.getBytes()) + ", expect " +
252                         HexFormat.of().withDelimiter(" ")
253                                 .formatHex(exp.getBytes()));
254             }
255 
256             a = NormalizerBase.normalize(b, NFD);
257             exp = DATA[i+2];
258             if (a.equals(exp)) {
259                 logln("Ok: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
260                       HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()));
261             } else {
262                 errln("FAIL: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
263                       HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()) + ", expect " + HexFormat.of().withDelimiter(" ").formatHex(exp.getBytes()));
264             }
265         }
266     }
267 
268     /**
269      * Make sure characters in the CompositionExclusion.txt list do not get
270      * composed to.
271      */
272     public void TestCompositionExclusion() throws Exception {
273         // This list is generated from CompositionExclusion.txt.
274         // Update whenever the normalizer tables are updated.  Note
275         // that we test all characters listed, even those that can be
276         // derived from the Unicode DB and are therefore commented
277         // out.
278 
279         /*
280          * kyuka's note:
281          *   Original data seemed to be based on Unicode 3.0.0(the initial
282          *   Composition Exclusions list) and seemed to have some mistakes.
283          *   Updated in order to correct mistakes and to support Unicode 4.0.0.
284          *   And, this table can be used also for Unicode 3.2.0.
285          */
286         String[][] EXCLUDED_UNICODE_3_2_0 = {
287             {"\u0340"},
288             {"\u0341"},
289             {"\u0343"},
290             {"\u0344"},
291             {"\u0374"},
292             {"\u037E"},
293             {"\u0387"},
294             {"\u0958"},
295             {"\u0959", "\u095F"},
296             {"\u09DC"},
297             {"\u09DD"},
298             {"\u09DF"},
299             {"\u0A33"},
300             {"\u0A36"},
301             {"\u0A59", "\u0A5B"},
302             {"\u0A5E"},
303             {"\u0B5C"},
304             {"\u0B5D"},
305             {"\u0F43"},
306             {"\u0F4D"},
307             {"\u0F52"},
308             {"\u0F57"},
309             {"\u0F5C"},
310             {"\u0F69"},
311             {"\u0F73"},
312             {"\u0F75"},
313             {"\u0F76"},
314             {"\u0F78"},
315             {"\u0F81"},
316             {"\u0F93"},
317             {"\u0F9D"},
318             {"\u0FA2"},
319             {"\u0FA7"},
320             {"\u0FAC"},
321             {"\u0FB9"},
322             {"\u1F71"},
323             {"\u1F73"},
324             {"\u1F75"},
325             {"\u1F77"},
326             {"\u1F79"},
327             {"\u1F7B"},
328             {"\u1F7D"},
329             {"\u1FBB"},
330             {"\u1FBE"},
331             {"\u1FC9"},
332             {"\u1FCB"},
333             {"\u1FD3"},
334             {"\u1FDB"},
335             {"\u1FE3"},
336             {"\u1FEB"},
337             {"\u1FEE"},
338             {"\u1FEF"},
339             {"\u1FF9"},
340             {"\u1FFB"},
341             {"\u1FFD"},
342             {"\u2000"},
343             {"\u2001"},
344             {"\u2126"},
345             {"\u212A"},
346             {"\u212B"},
347             {"\u2329"},
348             {"\u232A"},
349             {"\u2ADC"},
350             {"\uF900", "\uFA0D"},
351             {"\uFA10"},
352             {"\uFA12"},
353             {"\uFA15", "\uFA1E"},
354             {"\uFA20"},
355             {"\uFA22"},
356             {"\uFA25"},
357             {"\uFA26"},
358             {"\uFA2A", "\uFA2D"},
359             {"\uFA30", "\uFA6A"},
360             {"\uFB1D"},
361             {"\uFB1F"},
362             {"\uFB2A", "\uFB36"},
363             {"\uFB38", "\uFB3C"},
364             {"\uFB3E"},
365             {"\uFB40"},
366             {"\uFB41"},
367             {"\uFB43"},
368             {"\uFB44"},
369             {"\uFB46", "\uFB4E"},
370             {"\uD834\uDD5E", "\uD834\uDD64"},
371             {"\uD834\uDDBB", "\uD834\uDDC0"},
372             {"\uD87E\uDC00", "\uD87E\uDE1D"}
373         };
374 
375         String[][] EXCLUDED_LATEST = {
376 
377         };
378 
379         for (int i = 0; i < EXCLUDED_UNICODE_3_2_0.length; ++i) {
380             if (EXCLUDED_UNICODE_3_2_0[i].length == 1) {
381                 checkCompositionExclusion_320(EXCLUDED_UNICODE_3_2_0[i][0]);
382             } else {
383                 int from, to;
384                 from = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][0], 0);
385                 to   = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][1], 0);
386 
387                 for (int j = from; j <= to; j++) {
388                     checkCompositionExclusion_320(String.valueOf(Character.toChars(j)));
389                 }
390             }
391         }
392     }
393 
394     private void checkCompositionExclusion_320(String s) throws Exception {
395         String a = String.valueOf(s);
396         String b = NormalizerBase.normalize(a, NFKD);
397         String c = NormalizerBase.normalize(b, NFC);
398 
399         if (c.equals(a)) {
400             errln("FAIL: " + HexFormat.of().withDelimiter(" ")
401                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
402                   HexFormat.of().withDelimiter(" ")
403                           .formatHex(b.getBytes()) + " x COMPOSE => " +
404                   HexFormat.of().withDelimiter(" ")
405                           .formatHex(c.getBytes()) + " for the latest Unicode");
406         } else if (verbose) {
407             logln("Ok: " + HexFormat.of().withDelimiter(" ")
408                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
409                   HexFormat.of().withDelimiter(" ")
410                           .formatHex(b.getBytes()) + " x COMPOSE => " +
411                   HexFormat.of().withDelimiter(" ")
412                           .formatHex(c.getBytes()) + " for the latest Unicode");
413         }
414 
415         b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
416         c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
417         if (c.equals(a)) {
418             errln("FAIL: " + HexFormat.of().withDelimiter(" ")
419                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
420                   HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x COMPOSE => " +
421                   HexFormat.of().withDelimiter(" ").formatHex(c.getBytes()) + " for Unicode 3.2.0");
422         } else if (verbose) {
423             logln("Ok: " + HexFormat.of().withDelimiter(" ")
424                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
425                   HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x COMPOSE => " +
426                   HexFormat.of().withDelimiter(" ").formatHex(c.getBytes()) + " for Unicode 3.2.0");
427         }
428     }
429 
430     public void TestTibetan() throws Exception {
431         String[][] decomp = {
432             { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
433         };
434         String[][] compose = {
435             { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
436         };
437 
438         staticTest(NFD, decomp, 1);
439         staticTest(NFKD,decomp, 2);
440         staticTest(NFC, compose, 1);
441         staticTest(NFKC,compose, 2);
442     }
443 
444     public void TestExplodingBase() throws Exception{
445         // \u017f - Latin small letter long s
446         // \u0307 - combining dot above
447         // \u1e61 - Latin small letter s with dot above
448         // \u1e9b - Latin small letter long s with dot above
449         String[][] canon = {
450             // Input                Decomposed              Composed
451             { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
452             { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
453         };
454         String[][] compat = {
455             // Input                Decomposed              Composed
456             { "\u017f",             "s",                    "s"           },
457             { "\u1e9b",             "s\u0307",              "\u1e61"      },
458         };
459 
460         staticTest(NFD, canon,  1);
461         staticTest(NFC, canon,  2);
462         staticTest(NFKD, compat, 1);
463         staticTest(NFKC, compat, 2);
464     }
465 
466     private String[][] canonTests = {
467         // Input                Decomposed              Composed
468 
469         { "cat",                "cat",                  "cat"               },
470         { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
471 
472         // D-dot_above
473         { "\u1e0a",             "D\u0307",              "\u1e0a"            },
474 
475         // D dot_above
476         { "D\u0307",            "D\u0307",              "\u1e0a"            },
477 
478         // D-dot_below dot_above
479         { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      },
480 
481         // D-dot_above dot_below
482         { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      },
483 
484         // D dot_below dot_above
485         { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      },
486 
487         // D dot_below cedilla dot_above
488         { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"},
489 
490         // D dot_above ogonek dot_below
491         { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"},
492 
493         // E-macron-grave
494         { "\u1E14",             "E\u0304\u0300",        "\u1E14"            },
495 
496         // E-macron + grave
497         { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            },
498 
499         // E-grave + macron
500         { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      },
501 
502         // angstrom_sign
503         { "\u212b",             "A\u030a",              "\u00c5"            },
504 
505         // A-ring
506         { "\u00c5",             "A\u030a",              "\u00c5"            },
507         { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
508         { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
509 
510         //updated with 3.0
511         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
512         { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     },
513 
514         { "Henry IV",           "Henry IV",             "Henry IV"          },
515         { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
516 
517         // ga(Zenkaku-Katakana)
518         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
519 
520         // ka(Zenkaku-Katakana) + ten(Zenkaku)
521         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
522 
523         // ka(Hankaku-Katakana) + ten(Hankaku-Katakana)
524         { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      },
525 
526         // ka(Zenkaku-Katakana) + ten(Hankaku)
527         { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      },
528         // ka(Hankaku-Katakana) + ten(Zenkaku)
529         { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      },
530 
531         { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
532 
533         { "\ud834\udd5e\ud834\udd57\ud834\udd65\ud834\udd5e",
534           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65",
535           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65" },
536     };
537 
538     private String[][] compatTests = {
539         // Input                Decomposed              Composed
540 
541         { "cat",                 "cat",                     "cat"           },
542 
543         // Alef-Lamed vs. Alef, Lamed
544         { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     },
545 
546         { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
547 
548         // ffi ligature -> f + f + i
549         { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        },
550 
551         //updated for 3.0
552         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
553 
554         // ffi ligature -> f + f + i
555         { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        },
556 
557         { "Henry IV",           "Henry IV",             "Henry IV"          },
558         { "Henry \u2163",       "Henry IV",             "Henry IV"          },
559 
560         // ga(Zenkaku-Katakana)
561         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
562 
563         // ka(Zenkaku-Katakana) + ten(Zenkaku)
564         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
565 
566         // ka(Hankaku-Katakana) + ten(Zenkaku)
567         { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            },
568 
569         /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
570         // ka(Hankaku-Katakana) + ten(Hankaku)
571         { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
572 
573         // ka(Zenkaku-Katakana) + ten(Hankaku)
574         { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
575     };
576 
577     public void TestNFD() throws Exception{
578         staticTest(NFD, canonTests, 1);
579     }
580 
581     public void TestNFC() throws Exception{
582         staticTest(NFC, canonTests, 2);
583     }
584 
585     public void TestNFKD() throws Exception{
586         staticTest(NFKD, compatTests, 1);
587     }
588 
589     public void TestNFKC() throws Exception{
590         staticTest(NFKC, compatTests, 2);
591     }
592 
593     private void staticTest(java.text.Normalizer.Form form,
594                             String[][] tests,
595                             int outCol) throws Exception {
596         for (int i = 0; i < tests.length; i++) {
597             String input = tests[i][0];
598             logln("Normalizing '" + input + "' (" + HexFormat.of()
599                     .withDelimiter(" ").formatHex(input.getBytes()) + ")" );
600 
601             String expect =tests[i][outCol];
602             String output = java.text.Normalizer.normalize(input, form);
603 
604             if (!output.equals(expect)) {
605                 errln("FAIL: case " + i
606                     + " expected '" + expect + "' (" + HexFormat.of()
607                         .withDelimiter(" ").formatHex(expect.getBytes()) + ")"
608                     + " but got '" + output + "' (" + HexFormat.of()
609                         .withDelimiter(" ").formatHex(output.getBytes()) + ")"
610 );
611             }
612         }
613     }
614 
615     // With Canonical decomposition, Hangul syllables should get decomposed
616     // into Jamo, but Jamo characters should not be decomposed into
617     // conjoining Jamo
618     private String[][] hangulCanon = {
619         // Input                Decomposed              Composed
620         { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
621         { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
622     };
623 
624     public void TestHangulCompose() throws Exception{
625         logln("Canonical composition...");
626         staticTest(NFC, hangulCanon,  2);
627      }
628 
629     public void TestHangulDecomp() throws Exception{
630         logln("Canonical decomposition...");
631         staticTest(NFD, hangulCanon, 1);
632     }
633 
634 }