< prev index next >

test/jdk/java/text/Normalizer/ICUBasicTest.java

Print this page
*** 1,7 ***
  /*
!  * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.
--- 1,7 ---
  /*
!  * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.

*** 24,13 ***
   * @test
   * @bug  4221795 8032446 8174270
   * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
   * src/com/ibm/icu/dev/test and modified.
   * @modules java.base/sun.text java.base/jdk.internal.icu.text
-  * @library /java/text/testlib
   * @compile -XDignore.symbol.file ICUBasicTest.java
!  * @run main/timeout=30 ICUBasicTest
   */
  
  /*
   *******************************************************************************
   * Copyright (C) 1996-2004, International Business Machines Corporation and    *
--- 24,12 ---
   * @test
   * @bug  4221795 8032446 8174270
   * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's
   * src/com/ibm/icu/dev/test and modified.
   * @modules java.base/sun.text java.base/jdk.internal.icu.text
   * @compile -XDignore.symbol.file ICUBasicTest.java
!  * @run junit/timeout=30 ICUBasicTest
   */
  
  /*
   *******************************************************************************
   * Copyright (C) 1996-2004, International Business Machines Corporation and    *

*** 43,15 ***
  
  import java.util.HexFormat;
  
  import static java.text.Normalizer.Form.*;
  
! public class ICUBasicTest extends IntlTest {
  
!     public static void main(String[] args) throws Exception {
!         new ICUBasicTest().run(args);
!     }
  
      /*
       * Normalization modes
       */
      private static final NormalizerBase.Mode NFCmode  = NormalizerBase.NFC;
--- 42,15 ---
  
  import java.util.HexFormat;
  
  import static java.text.Normalizer.Form.*;
  
! import org.junit.jupiter.api.Test;
  
! import static org.junit.jupiter.api.Assertions.fail;
! 
! public class ICUBasicTest {
  
      /*
       * Normalization modes
       */
      private static final NormalizerBase.Mode NFCmode  = NormalizerBase.NFC;

*** 75,10 ***
--- 74,11 ---
       *
       * Note:
       *   PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are
       *   different for earlier Unicode versions.
       */
+     @Test
      public void TestComposition() {
  
          final TestCompositionCase cases[] = new TestCompositionCase[] {
              new TestCompositionCase(NFC, UNICODE_3_2_0,
                  "\u1100\u0300\u1161\u0327",

*** 114,14 ***
  
          for (i=0; i<cases.length; ++i) {
              output = Normalizer.normalize(cases[i].input,
                                            cases[i].form, cases[i].options);
              if (!output.equals(cases[i].expect)) {
!                 errln("unexpected result for case " + i + ". Expected="
-                       + cases[i].expect + ", Actual=" + output);
-             } else if (verbose) {
-                 logln("expected result for case " + i + ". Expected="
                        + cases[i].expect + ", Actual=" + output);
              }
          }
      }
  
--- 114,11 ---
  
          for (i=0; i<cases.length; ++i) {
              output = Normalizer.normalize(cases[i].input,
                                            cases[i].form, cases[i].options);
              if (!output.equals(cases[i].expect)) {
!                 fail("unexpected result for case " + i + ". Expected="
                        + cases[i].expect + ", Actual=" + output);
              }
          }
      }
  

*** 142,31 ***
      }
  
      /*
       * Added in order to detect a regression.
       */
      public void TestCombiningMarks() {
          String src      = "\u0f71\u0f72\u0f73\u0f74\u0f75";
          String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
          String result   = NormalizerBase.normalize(src, NFD);
  
          if (!expected.equals(result)) {
!             errln("Reordering of combining marks failed. Expected: " +
                    HexFormat.of().withDelimiter(" ").formatHex(expected.getBytes())
                      + " Got: "+ HexFormat.of().withDelimiter(" ").formatHex(result.getBytes()));
          }
      }
  
      /*
       * Added in order to detect a regression.
       */
      public void TestBengali() throws Exception {
          String input = "\u09bc\u09be\u09cd\u09be";
          String output=NormalizerBase.normalize(input, NFC);
  
          if (!input.equals(output)) {
!              errln("ERROR in NFC of string");
          }
          return;
      }
  
  
--- 139,33 ---
      }
  
      /*
       * Added in order to detect a regression.
       */
+     @Test
      public void TestCombiningMarks() {
          String src      = "\u0f71\u0f72\u0f73\u0f74\u0f75";
          String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
          String result   = NormalizerBase.normalize(src, NFD);
  
          if (!expected.equals(result)) {
!             fail("Reordering of combining marks failed. Expected: " +
                    HexFormat.of().withDelimiter(" ").formatHex(expected.getBytes())
                      + " Got: "+ HexFormat.of().withDelimiter(" ").formatHex(result.getBytes()));
          }
      }
  
      /*
       * Added in order to detect a regression.
       */
+     @Test
      public void TestBengali() throws Exception {
          String input = "\u09bc\u09be\u09cd\u09be";
          String output=NormalizerBase.normalize(input, NFC);
  
          if (!input.equals(output)) {
!              fail("ERROR in NFC of string");
          }
          return;
      }
  
  

*** 176,10 ***
--- 175,11 ---
      /**
       * Test for a problem found by Verisign.  Problem is that
       * characters at the start of a string are not put in canonical
       * order correctly by compose() if there is no starter.
       */
+     @Test
      public void TestVerisign() throws Exception {
          String[] inputs = {
              "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
              "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
          };

*** 192,21 ***
              String input = inputs[i];
              String output = outputs[i];
  
              String result = NormalizerBase.normalize(input, NFD);
              if (!result.equals(output)) {
!                 errln("FAIL input: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(input.getBytes()) + "\n" +
                        " decompose: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(result.getBytes()) + "\n" +
                        "  expected: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(output.getBytes()));
              }
  
              result = NormalizerBase.normalize(input, NFC);
              if (!result.equals(output)) {
!                 errln("FAIL input: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(input.getBytes()) + "\n" +
                        "   compose: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(output.getBytes()) + "\n" +
                        "  expected: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(output.getBytes()));
--- 192,21 ---
              String input = inputs[i];
              String output = outputs[i];
  
              String result = NormalizerBase.normalize(input, NFD);
              if (!result.equals(output)) {
!                 fail("FAIL input: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(input.getBytes()) + "\n" +
                        " decompose: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(result.getBytes()) + "\n" +
                        "  expected: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(output.getBytes()));
              }
  
              result = NormalizerBase.normalize(input, NFC);
              if (!result.equals(output)) {
!                 fail("FAIL input: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(input.getBytes()) + "\n" +
                        "   compose: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(output.getBytes()) + "\n" +
                        "  expected: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(output.getBytes()));

*** 221,10 ***
--- 221,11 ---
       * decompositions.  However, having an index of zero means that
       * they all share one typeMask[] entry, that is, they all have to
       * map to the same canonical class, which is not the case, in
       * reality.
       */
+     @Test
      public void TestZeroIndex() throws Exception {
          String[] DATA = {
              // Expect col1 x COMPOSE_COMPAT => col2
              // Expect col2 x DECOMP => col3
              "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",

*** 238,39 ***
              String a = DATA[i];
              String b = NormalizerBase.normalize(a, NFKC);
              String exp = DATA[i+1];
  
              if (b.equals(exp)) {
!                 logln("Ok: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(a.getBytes()) + " x COMPOSE_COMPAT => " +
                        HexFormat.of().withDelimiter(" ")
                                .formatHex(b.getBytes()));
              } else {
!                 errln("FAIL: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(b.getBytes()) + " x COMPOSE_COMPAT => " +
                        HexFormat.of().withDelimiter(" ")
                                .formatHex(a.getBytes()) + ", expect " +
                          HexFormat.of().withDelimiter(" ")
                                  .formatHex(exp.getBytes()));
              }
  
              a = NormalizerBase.normalize(b, NFD);
              exp = DATA[i+2];
              if (a.equals(exp)) {
!                 logln("Ok: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
                        HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()));
              } else {
!                 errln("FAIL: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
                        HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()) + ", expect " + HexFormat.of().withDelimiter(" ").formatHex(exp.getBytes()));
              }
          }
      }
  
      /**
       * Make sure characters in the CompositionExclusion.txt list do not get
       * composed to.
       */
      public void TestCompositionExclusion() throws Exception {
          // This list is generated from CompositionExclusion.txt.
          // Update whenever the normalizer tables are updated.  Note
          // that we test all characters listed, even those that can be
          // derived from the Unicode DB and are therefore commented
--- 239,40 ---
              String a = DATA[i];
              String b = NormalizerBase.normalize(a, NFKC);
              String exp = DATA[i+1];
  
              if (b.equals(exp)) {
!                 System.out.println("Ok: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(a.getBytes()) + " x COMPOSE_COMPAT => " +
                        HexFormat.of().withDelimiter(" ")
                                .formatHex(b.getBytes()));
              } else {
!                 fail("FAIL: " + HexFormat.of().withDelimiter(" ")
                          .formatHex(b.getBytes()) + " x COMPOSE_COMPAT => " +
                        HexFormat.of().withDelimiter(" ")
                                .formatHex(a.getBytes()) + ", expect " +
                          HexFormat.of().withDelimiter(" ")
                                  .formatHex(exp.getBytes()));
              }
  
              a = NormalizerBase.normalize(b, NFD);
              exp = DATA[i+2];
              if (a.equals(exp)) {
!                 System.out.println("Ok: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
                        HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()));
              } else {
!                 fail("FAIL: " + HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x DECOMP => " +
                        HexFormat.of().withDelimiter(" ").formatHex(a.getBytes()) + ", expect " + HexFormat.of().withDelimiter(" ").formatHex(exp.getBytes()));
              }
          }
      }
  
      /**
       * Make sure characters in the CompositionExclusion.txt list do not get
       * composed to.
       */
+     @Test
      public void TestCompositionExclusion() throws Exception {
          // This list is generated from CompositionExclusion.txt.
          // Update whenever the normalizer tables are updated.  Note
          // that we test all characters listed, even those that can be
          // derived from the Unicode DB and are therefore commented

*** 395,40 ***
          String a = String.valueOf(s);
          String b = NormalizerBase.normalize(a, NFKD);
          String c = NormalizerBase.normalize(b, NFC);
  
          if (c.equals(a)) {
!             errln("FAIL: " + HexFormat.of().withDelimiter(" ")
-                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
-                   HexFormat.of().withDelimiter(" ")
-                           .formatHex(b.getBytes()) + " x COMPOSE => " +
-                   HexFormat.of().withDelimiter(" ")
-                           .formatHex(c.getBytes()) + " for the latest Unicode");
-         } else if (verbose) {
-             logln("Ok: " + HexFormat.of().withDelimiter(" ")
                      .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
                    HexFormat.of().withDelimiter(" ")
                            .formatHex(b.getBytes()) + " x COMPOSE => " +
                    HexFormat.of().withDelimiter(" ")
                            .formatHex(c.getBytes()) + " for the latest Unicode");
          }
  
          b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
          c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
          if (c.equals(a)) {
!             errln("FAIL: " + HexFormat.of().withDelimiter(" ")
-                     .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
-                   HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x COMPOSE => " +
-                   HexFormat.of().withDelimiter(" ").formatHex(c.getBytes()) + " for Unicode 3.2.0");
-         } else if (verbose) {
-             logln("Ok: " + HexFormat.of().withDelimiter(" ")
                      .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
                    HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x COMPOSE => " +
                    HexFormat.of().withDelimiter(" ").formatHex(c.getBytes()) + " for Unicode 3.2.0");
          }
      }
  
      public void TestTibetan() throws Exception {
          String[][] decomp = {
              { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
          };
          String[][] compose = {
--- 397,29 ---
          String a = String.valueOf(s);
          String b = NormalizerBase.normalize(a, NFKD);
          String c = NormalizerBase.normalize(b, NFC);
  
          if (c.equals(a)) {
!             fail("FAIL: " + HexFormat.of().withDelimiter(" ")
                      .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
                    HexFormat.of().withDelimiter(" ")
                            .formatHex(b.getBytes()) + " x COMPOSE => " +
                    HexFormat.of().withDelimiter(" ")
                            .formatHex(c.getBytes()) + " for the latest Unicode");
          }
  
          b = NormalizerBase.normalize(a, NFKD, Normalizer.UNICODE_3_2);
          c = NormalizerBase.normalize(b, NFC, Normalizer.UNICODE_3_2);
          if (c.equals(a)) {
!             fail("FAIL: " + HexFormat.of().withDelimiter(" ")
                      .formatHex(a.getBytes()) + " x DECOMP_COMPAT => " +
                    HexFormat.of().withDelimiter(" ").formatHex(b.getBytes()) + " x COMPOSE => " +
                    HexFormat.of().withDelimiter(" ").formatHex(c.getBytes()) + " for Unicode 3.2.0");
          }
      }
  
+     @Test
      public void TestTibetan() throws Exception {
          String[][] decomp = {
              { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
          };
          String[][] compose = {

*** 439,10 ***
--- 430,11 ---
          staticTest(NFKD,decomp, 2);
          staticTest(NFC, compose, 1);
          staticTest(NFKC,compose, 2);
      }
  
+     @Test
      public void TestExplodingBase() throws Exception{
          // \u017f - Latin small letter long s
          // \u0307 - combining dot above
          // \u1e61 - Latin small letter s with dot above
          // \u1e9b - Latin small letter long s with dot above

*** 572,39 ***
  
          // ka(Zenkaku-Katakana) + ten(Hankaku)
          { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
      };
  
      public void TestNFD() throws Exception{
          staticTest(NFD, canonTests, 1);
      }
  
      public void TestNFC() throws Exception{
          staticTest(NFC, canonTests, 2);
      }
  
      public void TestNFKD() throws Exception{
          staticTest(NFKD, compatTests, 1);
      }
  
      public void TestNFKC() throws Exception{
          staticTest(NFKC, compatTests, 2);
      }
  
      private void staticTest(java.text.Normalizer.Form form,
                              String[][] tests,
                              int outCol) throws Exception {
          for (int i = 0; i < tests.length; i++) {
              String input = tests[i][0];
!             logln("Normalizing '" + input + "' (" + HexFormat.of()
                      .withDelimiter(" ").formatHex(input.getBytes()) + ")" );
  
              String expect =tests[i][outCol];
              String output = java.text.Normalizer.normalize(input, form);
  
              if (!output.equals(expect)) {
!                 errln("FAIL: case " + i
                      + " expected '" + expect + "' (" + HexFormat.of()
                          .withDelimiter(" ").formatHex(expect.getBytes()) + ")"
                      + " but got '" + output + "' (" + HexFormat.of()
                          .withDelimiter(" ").formatHex(output.getBytes()) + ")"
  );
--- 564,43 ---
  
          // ka(Zenkaku-Katakana) + ten(Hankaku)
          { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            },
      };
  
+     @Test
      public void TestNFD() throws Exception{
          staticTest(NFD, canonTests, 1);
      }
  
+     @Test
      public void TestNFC() throws Exception{
          staticTest(NFC, canonTests, 2);
      }
  
+     @Test
      public void TestNFKD() throws Exception{
          staticTest(NFKD, compatTests, 1);
      }
  
+     @Test
      public void TestNFKC() throws Exception{
          staticTest(NFKC, compatTests, 2);
      }
  
      private void staticTest(java.text.Normalizer.Form form,
                              String[][] tests,
                              int outCol) throws Exception {
          for (int i = 0; i < tests.length; i++) {
              String input = tests[i][0];
!             System.out.println("Normalizing '" + input + "' (" + HexFormat.of()
                      .withDelimiter(" ").formatHex(input.getBytes()) + ")" );
  
              String expect =tests[i][outCol];
              String output = java.text.Normalizer.normalize(input, form);
  
              if (!output.equals(expect)) {
!                 fail("FAIL: case " + i
                      + " expected '" + expect + "' (" + HexFormat.of()
                          .withDelimiter(" ").formatHex(expect.getBytes()) + ")"
                      + " but got '" + output + "' (" + HexFormat.of()
                          .withDelimiter(" ").formatHex(output.getBytes()) + ")"
  );

*** 619,16 ***
          // Input                Decomposed              Composed
          { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
          { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
      };
  
      public void TestHangulCompose() throws Exception{
!         logln("Canonical composition...");
          staticTest(NFC, hangulCanon,  2);
       }
  
      public void TestHangulDecomp() throws Exception{
!         logln("Canonical decomposition...");
          staticTest(NFD, hangulCanon, 1);
      }
  
  }
--- 615,18 ---
          // Input                Decomposed              Composed
          { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
          { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
      };
  
+     @Test
      public void TestHangulCompose() throws Exception{
!         System.out.println("Canonical composition...");
          staticTest(NFC, hangulCanon,  2);
       }
  
+     @Test
      public void TestHangulDecomp() throws Exception{
!         System.out.println("Canonical decomposition...");
          staticTest(NFD, hangulCanon, 1);
      }
  
  }
< prev index next >