1 /*
  2  * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 /*
 24  * @test
 25  * @bug  4221795 8032446 8174270
 26  * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
 27  * src/com/ibm/icu/dev/test and modified.
 28  * @modules java.base/sun.text java.base/jdk.internal.icu.text
 29  * @compile -XDignore.symbol.file ICUBasicTest.java
 30  * @run junit/timeout=30 ICUBasicTest
 31  */
 32 
 33 /*
 34  *******************************************************************************
 35  * Copyright (C) 1996-2004, International Business Machines Corporation and    *
 36  * others. All Rights Reserved.                                                *
 37  *******************************************************************************
 38  */
 39 
 40 import sun.text.Normalizer;
 41 import jdk.internal.icu.text.NormalizerBase;
 42 
 43 import java.util.HexFormat;
 44 
 45 import static java.text.Normalizer.Form.*;
 46 
 47 import org.junit.jupiter.api.Test;
 48 
 49 import static org.junit.jupiter.api.Assertions.fail;
 50 
 51 public class ICUBasicTest {
 52 
 53     /*
 54      * Normalization modes
 55      */
 56     private static final NormalizerBase.Mode NFCmode  = NormalizerBase.NFC;
 57     private static final NormalizerBase.Mode NFDmode  = NormalizerBase.NFD;
 58     private static final NormalizerBase.Mode NFKCmode = NormalizerBase.NFKC;
 59     private static final NormalizerBase.Mode NFKDmode = NormalizerBase.NFKD;
 60     private static final NormalizerBase.Mode NONEmode = NormalizerBase.NONE;
 61 
 62     /*
 63      * Normalization options
 64      */
 65 
 66     /* Normal Unicode versions */
 67     private static final int UNICODE_3_2_0  = Normalizer.UNICODE_3_2;
 68     private static final int UNICODE_LATEST = NormalizerBase.UNICODE_LATEST;
 69 
 70     /*
 71      * Special cases for UAX #15 bug
 72      * see Unicode Public Review Issue #29
 73      * at http://www.unicode.org/review/resolved-pri.html#pri29
 74      *
 75      * Note:
 76      *   PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are
 77      *   different for earlier Unicode versions.
 78      */
 79     @Test
 80     public void TestComposition() {
 81 
 82         final TestCompositionCase cases[] = new TestCompositionCase[] {
 83             new TestCompositionCase(NFC, UNICODE_3_2_0,
 84                 "\u1100\u0300\u1161\u0327",
 85                 "\u1100\u0300\u1161\u0327"),
 86             new TestCompositionCase(NFC, UNICODE_LATEST,
 87                 "\u1100\u0300\u1161\u0327",
 88                 "\u1100\u0300\u1161\u0327"),
 89 
 90             new TestCompositionCase(NFC, UNICODE_3_2_0,
 91                 "\u1100\u0300\u1161\u0327\u11a8",
 92                 "\u1100\u0300\u1161\u0327\u11a8"),
 93             new TestCompositionCase(NFC, UNICODE_LATEST,
 94                 "\u1100\u0300\u1161\u0327\u11a8",
 95                 "\u1100\u0300\u1161\u0327\u11a8"),
 96 
 97             new TestCompositionCase(NFC, UNICODE_3_2_0,
 98                 "\uac00\u0300\u0327\u11a8",
 99                 "\uac00\u0327\u0300\u11a8"),
100             new TestCompositionCase(NFC, UNICODE_LATEST,
101                 "\uac00\u0300\u0327\u11a8",
102                 "\uac00\u0327\u0300\u11a8"),
103 
104             new TestCompositionCase(NFC, UNICODE_3_2_0,
105                 "\u0b47\u0300\u0b3e",
106                 "\u0b47\u0300\u0b3e"),
107             new TestCompositionCase(NFC, UNICODE_LATEST,
108                 "\u0b47\u0300\u0b3e",
109                 "\u0b47\u0300\u0b3e"),
110         };
111 
112         String output;
113         int i, length;
114 
115         for (i=0; i<cases.length; ++i) {
116             output = Normalizer.normalize(cases[i].input,
117                                           cases[i].form, cases[i].options);
118             if (!output.equals(cases[i].expect)) {
119                 fail("unexpected result for case " + i + ". Expected="
120                       + cases[i].expect + ", Actual=" + output);
121             }
122         }
123     }
124 
125     private final static class TestCompositionCase {
126         public java.text.Normalizer.Form form;
127         public int options;
128         public String input, expect;
129 
130         TestCompositionCase(java.text.Normalizer.Form form,
131                             int options,
132                             String input,
133                             String expect) {
134             this.form    = form;
135             this.options = options;
136             this.input   = input;
137             this.expect  = expect;
138         }
139     }
140 
141     /*
142      * Added in order to detect a regression.
143      */
144     @Test
145     public void TestCombiningMarks() {
146         String src      = "\u0f71\u0f72\u0f73\u0f74\u0f75";
147         String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
148         String result   = NormalizerBase.normalize(src, NFD);
149 
150         if (!expected.equals(result)) {
151             fail("Reordering of combining marks failed. Expected: " +
152                   HexFormat.of().withDelimiter(" ").formatHex(expected.getBytes())
153                     + " Got: "+ HexFormat.of().withDelimiter(" ").formatHex(result.getBytes()));
154         }
155     }
156 
157     /*
158      * Added in order to detect a regression.
159      */
160     @Test
161     public void TestBengali() throws Exception {
162         String input = "\u09bc\u09be\u09cd\u09be";
163         String output=NormalizerBase.normalize(input, NFC);
164 
165         if (!input.equals(output)) {
166              fail("ERROR in NFC of string");
167         }
168         return;
169     }
170 
171 
172     /*
173      * Added in order to detect a regression.
174      */
175     /**
176      * Test for a problem found by Verisign.  Problem is that
177      * characters at the start of a string are not put in canonical
178      * order correctly by compose() if there is no starter.
179      */
180     @Test
181     public void TestVerisign() throws Exception {
182         String[] inputs = {
183             "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
184             "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
185         };
186         String[] outputs = {
187             "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
188             "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
189         };
190 
191         for (int i = 0; i < inputs.length; ++i) {
192             String input = inputs[i];
193             String output = outputs[i];
194 
195             String result = NormalizerBase.normalize(input, NFD);
196             if (!result.equals(output)) {
197                 fail("FAIL input: " + HexFormat.of().withDelimiter(" ")
198                         .formatHex(input.getBytes()) + "\n" +
199                       " decompose: " + HexFormat.of().withDelimiter(" ")
200                         .formatHex(result.getBytes()) + "\n" +
201                       "  expected: " + HexFormat.of().withDelimiter(" ")
202                         .formatHex(output.getBytes()));
203             }
204 
205             result = NormalizerBase.normalize(input, NFC);
206             if (!result.equals(output)) {
207                 fail("FAIL input: " + HexFormat.of().withDelimiter(" ")
208                         .formatHex(input.getBytes()) + "\n" +
209                       "   compose: " + HexFormat.of().withDelimiter(" ")
210                         .formatHex(output.getBytes()) + "\n" +
211                       "  expected: " + HexFormat.of().withDelimiter(" ")
212                         .formatHex(output.getBytes()));
213             }
214         }
215     }
216 
217     /**
218      * Test for a problem that showed up just before ICU 1.6 release
219      * having to do with combining characters with an index of zero.
220      * Such characters do not participate in any canonical
221      * decompositions.  However, having an index of zero means that
222      * they all share one typeMask[] entry, that is, they all have to
223      * map to the same canonical class, which is not the case, in
224      * reality.
225      */
226     @Test
227     public void TestZeroIndex() throws Exception {
228         String[] DATA = {
229             // Expect col1 x COMPOSE_COMPAT => col2
230             // Expect col2 x DECOMP => col3
231             "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
232             "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
233             "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
234             "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
235             "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
236         };
237 
238         for (int i=0; i<DATA.length; i+=3) {
239             String a = DATA[i];
240             String b = NormalizerBase.normalize(a, NFKC);
241             String exp = DATA[i+1];
242 
243             if (b.equals(exp)) {
244                 System.out.println("Ok: " + HexFormat.of().withDelimiter(" ")
245                         .formatHex(a.getBytes()) + " x COMPOSE_COMPAT => " +
246                       HexFormat.of().withDelimiter(" ")
247                               .formatHex(b.getBytes()));
248             } else {
249                 fail("FAIL: " + HexFormat.of().withDelimiter(" ")
250                         .formatHex(b.getBytes()) + " x COMPOSE_COMPAT => " +
251                       HexFormat.of().withDelimiter(" ")
252                               .formatHex(a.getBytes()) + ", expect " +
253                         HexFormat.of().withDelimiter(" ")
254                                 .formatHex(exp.getBytes()));
255             }
256 
257             a = NormalizerBase.normalize(b, NFD);
258             exp = DATA[i+2];
259             if (a.equals(exp)) {
260                 System.out.println("Ok: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
261                       HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()));
262             } else {
263                 fail("FAIL: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
264                       HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()) + ", expect " + HexFormat.of().withDelimiter(" ").formatHex(exp.getBytes()));
265             }
266         }
267     }
268 
269     /**
270      * Make sure characters in the CompositionExclusion.txt list do not get
271      * composed to.
272      */
273     @Test
274     public void TestCompositionExclusion() throws Exception {
275         // This list is generated from CompositionExclusion.txt.
276         // Update whenever the normalizer tables are updated.  Note
277         // that we test all characters listed, even those that can be
278         // derived from the Unicode DB and are therefore commented
279         // out.
280 
281         /*
282          * kyuka's note:
283          *   Original data seemed to be based on Unicode 3.0.0(the initial
284          *   Composition Exclusions list) and seemed to have some mistakes.
285          *   Updated in order to correct mistakes and to support Unicode 4.0.0.
286          *   And, this table can be used also for Unicode 3.2.0.
287          */
288         String[][] EXCLUDED_UNICODE_3_2_0 = {
289             {"\u0340"},
290             {"\u0341"},
291             {"\u0343"},
292             {"\u0344"},
293             {"\u0374"},
294             {"\u037E"},
295             {"\u0387"},
296             {"\u0958"},
297             {"\u0959", "\u095F"},
298             {"\u09DC"},
299             {"\u09DD"},
300             {"\u09DF"},
301             {"\u0A33"},
302             {"\u0A36"},
303             {"\u0A59", "\u0A5B"},
304             {"\u0A5E"},
305             {"\u0B5C"},
306             {"\u0B5D"},
307             {"\u0F43"},
308             {"\u0F4D"},
309             {"\u0F52"},
310             {"\u0F57"},
311             {"\u0F5C"},
312             {"\u0F69"},
313             {"\u0F73"},
314             {"\u0F75"},
315             {"\u0F76"},
316             {"\u0F78"},
317             {"\u0F81"},
318             {"\u0F93"},
319             {"\u0F9D"},
320             {"\u0FA2"},
321             {"\u0FA7"},
322             {"\u0FAC"},
323             {"\u0FB9"},
324             {"\u1F71"},
325             {"\u1F73"},
326             {"\u1F75"},
327             {"\u1F77"},
328             {"\u1F79"},
329             {"\u1F7B"},
330             {"\u1F7D"},
331             {"\u1FBB"},
332             {"\u1FBE"},
333             {"\u1FC9"},
334             {"\u1FCB"},
335             {"\u1FD3"},
336             {"\u1FDB"},
337             {"\u1FE3"},
338             {"\u1FEB"},
339             {"\u1FEE"},
340             {"\u1FEF"},
341             {"\u1FF9"},
342             {"\u1FFB"},
343             {"\u1FFD"},
344             {"\u2000"},
345             {"\u2001"},
346             {"\u2126"},
347             {"\u212A"},
348             {"\u212B"},
349             {"\u2329"},
350             {"\u232A"},
351             {"\u2ADC"},
352             {"\uF900", "\uFA0D"},
353             {"\uFA10"},
354             {"\uFA12"},
355             {"\uFA15", "\uFA1E"},
356             {"\uFA20"},
357             {"\uFA22"},
358             {"\uFA25"},
359             {"\uFA26"},
360             {"\uFA2A", "\uFA2D"},
361             {"\uFA30", "\uFA6A"},
362             {"\uFB1D"},
363             {"\uFB1F"},
364             {"\uFB2A", "\uFB36"},
365             {"\uFB38", "\uFB3C"},
366             {"\uFB3E"},
367             {"\uFB40"},
368             {"\uFB41"},
369             {"\uFB43"},
370             {"\uFB44"},
371             {"\uFB46", "\uFB4E"},
372             {"\uD834\uDD5E", "\uD834\uDD64"},
373             {"\uD834\uDDBB", "\uD834\uDDC0"},
374             {"\uD87E\uDC00", "\uD87E\uDE1D"}
375         };
376 
377         String[][] EXCLUDED_LATEST = {
378 
379         };
380 
381         for (int i = 0; i < EXCLUDED_UNICODE_3_2_0.length; ++i) {
382             if (EXCLUDED_UNICODE_3_2_0[i].length == 1) {
383                 checkCompositionExclusion_320(EXCLUDED_UNICODE_3_2_0[i][0]);
384             } else {
385                 int from, to;
386                 from = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][0], 0);
387                 to   = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][1], 0);
388 
389                 for (int j = from; j <= to; j++) {
390                     checkCompositionExclusion_320(String.valueOf(Character.toChars(j)));
391                 }
392             }
393         }
394     }
395 
396     private void checkCompositionExclusion_320(String s) throws Exception {
397         String a = String.valueOf(s);
398         String b = NormalizerBase.normalize(a, NFKD);
399         String c = NormalizerBase.normalize(b, NFC);
400 
401         if (c.equals(a)) {
402             fail("FAIL: " + HexFormat.of().withDelimiter(" ")
403                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
404                   HexFormat.of().withDelimiter(" ")
405                           .formatHex(b.getBytes()) + " x COMPOSE => " +
406                   HexFormat.of().withDelimiter(" ")
407                           .formatHex(c.getBytes()) + " for the latest Unicode");
408         }
409 
410         b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
411         c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
412         if (c.equals(a)) {
413             fail("FAIL: " + HexFormat.of().withDelimiter(" ")
414                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
415                   HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x COMPOSE => " +
416                   HexFormat.of().withDelimiter(" ").formatHex(c.getBytes()) + " for Unicode 3.2.0");
417         }
418     }
419 
420     @Test
421     public void TestTibetan() throws Exception {
422         String[][] decomp = {
423             { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
424         };
425         String[][] compose = {
426             { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
427         };
428 
429         staticTest(NFD, decomp, 1);
430         staticTest(NFKD,decomp, 2);
431         staticTest(NFC, compose, 1);
432         staticTest(NFKC,compose, 2);
433     }
434 
435     @Test
436     public void TestExplodingBase() throws Exception{
437         // \u017f - Latin small letter long s
438         // \u0307 - combining dot above
439         // \u1e61 - Latin small letter s with dot above
440         // \u1e9b - Latin small letter long s with dot above
441         String[][] canon = {
442             // Input                Decomposed              Composed
443             { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
444             { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
445         };
446         String[][] compat = {
447             // Input                Decomposed              Composed
448             { "\u017f",             "s",                    "s"           },
449             { "\u1e9b",             "s\u0307",              "\u1e61"      },
450         };
451 
452         staticTest(NFD, canon,  1);
453         staticTest(NFC, canon,  2);
454         staticTest(NFKD, compat, 1);
455         staticTest(NFKC, compat, 2);
456     }
457 
458     private String[][] canonTests = {
459         // Input                Decomposed              Composed
460 
461         { "cat",                "cat",                  "cat"               },
462         { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
463 
464         // D-dot_above
465         { "\u1e0a",             "D\u0307",              "\u1e0a"            },
466 
467         // D dot_above
468         { "D\u0307",            "D\u0307",              "\u1e0a"            },
469 
470         // D-dot_below dot_above
471         { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      },
472 
473         // D-dot_above dot_below
474         { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      },
475 
476         // D dot_below dot_above
477         { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      },
478 
479         // D dot_below cedilla dot_above
480         { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"},
481 
482         // D dot_above ogonek dot_below
483         { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"},
484 
485         // E-macron-grave
486         { "\u1E14",             "E\u0304\u0300",        "\u1E14"            },
487 
488         // E-macron + grave
489         { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            },
490 
491         // E-grave + macron
492         { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      },
493 
494         // angstrom_sign
495         { "\u212b",             "A\u030a",              "\u00c5"            },
496 
497         // A-ring
498         { "\u00c5",             "A\u030a",              "\u00c5"            },
499         { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
500         { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
501 
502         //updated with 3.0
503         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
504         { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     },
505 
506         { "Henry IV",           "Henry IV",             "Henry IV"          },
507         { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
508 
509         // ga(Zenkaku-Katakana)
510         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
511 
512         // ka(Zenkaku-Katakana) + ten(Zenkaku)
513         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
514 
515         // ka(Hankaku-Katakana) + ten(Hankaku-Katakana)
516         { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      },
517 
518         // ka(Zenkaku-Katakana) + ten(Hankaku)
519         { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      },
520         // ka(Hankaku-Katakana) + ten(Zenkaku)
521         { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      },
522 
523         { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
524 
525         { "\ud834\udd5e\ud834\udd57\ud834\udd65\ud834\udd5e",
526           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65",
527           "\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65\ud834\udd57\ud834\udd65" },
528     };
529 
530     private String[][] compatTests = {
531         // Input                Decomposed              Composed
532 
533         { "cat",                 "cat",                     "cat"           },
534 
535         // Alef-Lamed vs. Alef, Lamed
536         { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     },
537 
538         { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
539 
540         // ffi ligature -> f + f + i
541         { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        },
542 
543         //updated for 3.0
544         { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },
545 
546         // ffi ligature -> f + f + i
547         { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        },
548 
549         { "Henry IV",           "Henry IV",             "Henry IV"          },
550         { "Henry \u2163",       "Henry IV",             "Henry IV"          },
551 
552         // ga(Zenkaku-Katakana)
553         { "\u30AC",             "\u30AB\u3099",         "\u30AC"            },
554 
555         // ka(Zenkaku-Katakana) + ten(Zenkaku)
556         { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            },
557 
558         // ka(Hankaku-Katakana) + ten(Zenkaku)
559         { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            },
560 
561         /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
562         // ka(Hankaku-Katakana) + ten(Hankaku)
563         { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
564 
565         // ka(Zenkaku-Katakana) + ten(Hankaku)
566         { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
567     };
568 
569     @Test
570     public void TestNFD() throws Exception{
571         staticTest(NFD, canonTests, 1);
572     }
573 
574     @Test
575     public void TestNFC() throws Exception{
576         staticTest(NFC, canonTests, 2);
577     }
578 
579     @Test
580     public void TestNFKD() throws Exception{
581         staticTest(NFKD, compatTests, 1);
582     }
583 
584     @Test
585     public void TestNFKC() throws Exception{
586         staticTest(NFKC, compatTests, 2);
587     }
588 
589     private void staticTest(java.text.Normalizer.Form form,
590                             String[][] tests,
591                             int outCol) throws Exception {
592         for (int i = 0; i < tests.length; i++) {
593             String input = tests[i][0];
594             System.out.println("Normalizing '" + input + "' (" + HexFormat.of()
595                     .withDelimiter(" ").formatHex(input.getBytes()) + ")" );
596 
597             String expect =tests[i][outCol];
598             String output = java.text.Normalizer.normalize(input, form);
599 
600             if (!output.equals(expect)) {
601                 fail("FAIL: case " + i
602                     + " expected '" + expect + "' (" + HexFormat.of()
603                         .withDelimiter(" ").formatHex(expect.getBytes()) + ")"
604                     + " but got '" + output + "' (" + HexFormat.of()
605                         .withDelimiter(" ").formatHex(output.getBytes()) + ")"
606 );
607             }
608         }
609     }
610 
611     // With Canonical decomposition, Hangul syllables should get decomposed
612     // into Jamo, but Jamo characters should not be decomposed into
613     // conjoining Jamo
614     private String[][] hangulCanon = {
615         // Input                Decomposed              Composed
616         { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
617         { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
618     };
619 
620     @Test
621     public void TestHangulCompose() throws Exception{
622         System.out.println("Canonical composition...");
623         staticTest(NFC, hangulCanon,  2);
624      }
625 
626     @Test
627     public void TestHangulDecomp() throws Exception{
628         System.out.println("Canonical decomposition...");
629         staticTest(NFD, hangulCanon, 1);
630     }
631 
632 }