< prev index next >

test/jdk/java/text/Collator/Regression.java

Print this page

  1 /*
  2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /**
 25  * @test
 26  * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
 27  *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
 28  *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
 29  *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
 30  * @library /java/text/testlib
 31  * @summary Regression tests for Collation and associated classes
 32  * @modules jdk.localedata

 33  */
 34 /*
 35 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
 36 (C) Copyright IBM Corp. 1996 - All Rights Reserved
 37 
 38   The original version of this source code and documentation is copyrighted and
 39 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
 40 provided under terms of a License Agreement between Taligent and Sun. This
 41 technology is protected by multiple US and International patents. This notice and
 42 attribution to Taligent may not be removed.
 43   Taligent is a registered trademark of Taligent, Inc.
 44 */
 45 
 46 import java.text.*;
 47 import java.util.Locale;
 48 import java.util.Vector;
 49 

 50 
 51 public class Regression extends CollatorTest {
 52 
 53     public static void main(String[] args) throws Exception {
 54         new Regression().run(args);
 55     }
 56 
 57     // CollationElementIterator.reset() doesn't work
 58     //

 59     public void Test4048446() {
 60         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
 61         CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
 62 
 63         while ( i1.next() != CollationElementIterator.NULLORDER ) {
 64         }
 65         i1.reset();
 66 
 67         assertEqual(i1, i2);
 68     }
 69 
 70 
 71     // Collator -> rules -> Collator round-trip broken for expanding characters
 72     //

 73     public void Test4051866() throws ParseException {
 74         // Build a collator containing expanding characters
 75         RuleBasedCollator c1 = new RuleBasedCollator("< o "
 76                                                     +"& oe ,o\u3080"
 77                                                     +"& oe ,\u1530 ,O"
 78                                                     +"& OE ,O\u3080"
 79                                                     +"& OE ,\u1520"
 80                                                     +"< p ,P");
 81 
 82         // Build another using the rules from  the first
 83         RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
 84 
 85         // Make sure they're the same
 86         if (!c1.getRules().equals(c2.getRules())) {
 87             errln("Rules are not equal");
 88         }
 89     }
 90 
 91     // Collator thinks "black-bird" == "black"
 92     //

 93     public void Test4053636() {
 94         if (en_us.equals("black-bird","black")) {
 95             errln("black-bird == black");
 96         }
 97     }
 98 
 99 
100     // CollationElementIterator will not work correctly if the associated
101     // Collator object's mode is changed
102     //

103     public void Test4054238() {
104         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
105 
106         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
107         CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
108 
109         c.setDecomposition(Collator.NO_DECOMPOSITION);
110         CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
111 
112         // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
113         // collator itself is in that mode
114         assertEqual(i1, i2);
115     }
116 
117     // Collator.IDENTICAL documented but not implemented
118     //

119     public void Test4054734() {
120         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
121         try {
122             c.setStrength(Collator.IDENTICAL);
123         }
124         catch (Exception e) {
125             errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
126         }
127 
128         String[] decomp = {
129             "\u0001",   "<",    "\u0002",
130             "\u0001",   "=",    "\u0001",
131             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
132             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
133         };
134         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
135         compareArray(c, decomp);
136 
137         String[] nodecomp = {
138             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
139         };
140         c.setDecomposition(Collator.NO_DECOMPOSITION);
141         compareArray(c, nodecomp);
142     }
143 
144     // Full Decomposition mode not implemented
145     //

146     public void Test4054736() {
147         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
148         c.setDecomposition(Collator.FULL_DECOMPOSITION);
149 
150         String[] tests = {
151             "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
152         };
153 
154         compareArray(c, tests);
155     }
156 
157     // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
158     //

159     public void Test4058613() {
160         // Creating a default collator doesn't work when Korean is the default
161         // locale
162 
163         Locale oldDefault = Locale.getDefault();
164 
165         Locale.setDefault( Locale.KOREAN );
166         try {
167             Collator c = Collator.getInstance();
168 
169             // Since the fix to this bug was to turn of decomposition for Korean collators,
170             // ensure that's what we got
171             if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
172               errln("Decomposition is not set to NO_DECOMPOSITION");
173             }
174         }
175         finally {
176             Locale.setDefault(oldDefault);
177         }
178     }
179 
180     // RuleBasedCollator.getRules does not return the exact pattern as input
181     // for expanding character sequences
182     //

183     public void Test4059820() {
184         RuleBasedCollator c = null;
185         try {
186             c = new RuleBasedCollator("< a < b , c/a < d < z");
187         } catch (ParseException e) {
188             errln("Exception building collator: " + e.toString());
189             return;
190         }
191         if ( c.getRules().indexOf("c/a") == -1) {
192             errln("returned rules do not contain 'c/a'");
193         }
194     }
195 
196     // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
197     //

198     public void Test4060154() {
199         RuleBasedCollator c = null;
200         try {
201             c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
202                                       + " & H < \u0131, \u0130, i, I" );
203         } catch (ParseException e) {
204             errln("Exception building collator: " + e.toString());
205             return;
206         }
207         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
208 
209         String[] tertiary = {
210             "A",        "<",    "B",
211             "H",        "<",    "\u0131",
212             "H",        "<",    "I",
213             "\u0131",   "<",    "\u0130",
214             "\u0130",   "<",    "i",
215             "\u0130",   ">",    "H",
216         };
217         c.setStrength(Collator.TERTIARY);
218         compareArray(c, tertiary);
219 
220         String[] secondary = {
221             "H",        "<",    "I",
222             "\u0131",   "=",    "\u0130",
223         };
224         c.setStrength(Collator.PRIMARY);
225         compareArray(c, secondary);
226     };
227 
228     // Secondary/Tertiary comparison incorrect in French Secondary
229     //

230     public void Test4062418() throws ParseException {
231         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
232         c.setStrength(Collator.SECONDARY);
233 
234         String[] tests = {
235                 "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
236         };
237 
238         compareArray(c, tests);
239     }
240 
241     // Collator.compare() method broken if either string contains spaces
242     //

243     public void Test4065540() {
244         if (en_us.compare("abcd e", "abcd f") == 0) {
245             errln("'abcd e' == 'abcd f'");
246         }
247     }
248 
249     // Unicode characters need to be recursively decomposed to get the
250     // correct result. For example,
251     // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
252     //

253     public void Test4066189() {
254         String test1 = "\u1EB1";
255         String test2 = "a\u0306\u0300";
256 
257         RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
258         c1.setDecomposition(Collator.FULL_DECOMPOSITION);
259         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
260 
261         RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
262         c2.setDecomposition(Collator.NO_DECOMPOSITION);
263         CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
264 
265         assertEqual(i1, i2);
266     }
267 
268     // French secondary collation checking at the end of compare iteration fails
269     //

270     public void Test4066696() {
271         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
272         c.setStrength(Collator.SECONDARY);
273 
274         String[] tests = {
275             "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
276         };
277 
278         compareArray(c, tests);
279     }
280 
281 
282     // Bad canonicalization of same-class combining characters
283     //

284     public void Test4076676() {
285         // These combining characters are all in the same class, so they should not
286         // be reordered, and they should compare as unequal.
287         String s1 = "A\u0301\u0302\u0300";
288         String s2 = "A\u0302\u0300\u0301";
289 
290         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
291         c.setStrength(Collator.TERTIARY);
292 
293         if (c.compare(s1,s2) == 0) {
294             errln("Same-class combining chars were reordered");
295         }
296     }
297 
298 
299     // RuleBasedCollator.equals(null) throws NullPointerException
300     //

301     public void Test4079231() {
302         try {
303             if (en_us.equals(null)) {
304                 errln("en_us.equals(null) returned true");
305             }
306         }
307         catch (Exception e) {
308             errln("en_us.equals(null) threw " + e.toString());
309         }
310     }
311 
312     // RuleBasedCollator breaks on "< a < bb" rule
313     //

314     public void Test4078588() throws ParseException {
315         RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
316 
317         int result = rbc.compare("a","bb");
318 
319         if (result != -1) {
320             errln("Compare(a,bb) returned " + result + "; expected -1");
321         }
322     }
323 
324     // Combining characters in different classes not reordered properly.
325     //

326     public void Test4081866() throws ParseException {
327         // These combining characters are all in different classes,
328         // so they should be reordered and the strings should compare as equal.
329         String s1 = "A\u0300\u0316\u0327\u0315";
330         String s2 = "A\u0327\u0316\u0315\u0300";
331 
332         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
333         c.setStrength(Collator.TERTIARY);
334 
335         // Now that the default collators are set to NO_DECOMPOSITION
336         // (as a result of fixing bug 4114077), we must set it explicitly
337         // when we're testing reordering behavior.  -- lwerner, 5/5/98
338         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
339 
340         if (c.compare(s1,s2) != 0) {
341             errln("Combining chars were not reordered");
342         }
343     }
344 
345     // string comparison errors in Scandinavian collators
346     //

347     public void Test4087241() {
348         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
349                                                         Locale.of("da", "DK"));
350         c.setStrength(Collator.SECONDARY);
351 
352         String[] tests = {
353             "\u007a",   "<",    "\u00e6",       // z        < ae
354             "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
355             "Y",        "<",    "u\u0308",      // Y        < u-umlaut
356         };
357 
358         compareArray(c, tests);
359     }
360 
361     // CollationKey takes ignorable strings into account when it shouldn't
362     //

363     public void Test4087243() {
364         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
365         c.setStrength(Collator.TERTIARY);
366 
367         String[] tests = {
368             "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
369         };
370 
371         compareArray(c, tests);
372     }
373 
374     // Mu/micro conflict
375     // Micro symbol and greek lowercase letter Mu should sort identically
376     //

377     public void Test4092260() {
378         Collator c = Collator.getInstance(Locale.of("el"));
379 
380         // will only be equal when FULL_DECOMPOSITION is used
381         c.setDecomposition(Collator.FULL_DECOMPOSITION);
382 
383         String[] tests = {
384             "\u00B5",      "=",    "\u03BC",
385         };
386 
387         compareArray(c, tests);
388     }
389 
390     void Test4095316() {
391         Collator c = Collator.getInstance(Locale.of("el", "GR"));
392         c.setStrength(Collator.TERTIARY);
393         // javadocs for RuleBasedCollator clearly specify that characters containing compatability
394         // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
395         c.setDecomposition(Collator.FULL_DECOMPOSITION);
396 
397         String[] tests = {
398             "\u03D4",      "=",    "\u03AB",
399         };
400 
401         compareArray(c, tests);
402     }
403 

404     public void Test4101940() {
405         try {
406             RuleBasedCollator c = new RuleBasedCollator("< a < b");
407             CollationElementIterator i = c.getCollationElementIterator("");
408             i.reset();
409 
410             if (i.next() != i.NULLORDER) {
411                 errln("next did not return NULLORDER");
412             }
413         }
414         catch (Exception e) {
415             errln("Caught " + e );
416         }
417     }
418 
419     // Collator.compare not handling spaces properly
420     //

421     public void Test4103436() {
422         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
423         c.setStrength(Collator.TERTIARY);
424 
425         String[] tests = {
426             "file",      "<",    "file access",
427             "file",      "<",    "fileaccess",
428         };
429 
430         compareArray(c, tests);
431     }
432 
433     // Collation not Unicode conformant with Hangul syllables
434     //

435     public void Test4114076() {
436         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
437         c.setStrength(Collator.TERTIARY);
438 
439         //
440         // With Canonical decomposition, Hangul syllables should get decomposed
441         // into Jamo, but Jamo characters should not be decomposed into
442         // conjoining Jamo
443         //
444         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
445         String[] test1 = {
446             "\ud4db",   "=",    "\u1111\u1171\u11b6",
447         };
448         compareArray(c, test1);
449 
450         // Full decomposition result should be the same as canonical decomposition
451         // for all hangul.
452         c.setDecomposition(Collator.FULL_DECOMPOSITION);
453         compareArray(c, test1);
454 
455     }
456 
457 
458     // Collator.getCollationKey was hanging on certain character sequences
459     //

460     public void Test4124632() throws Exception {
461         Collator coll = Collator.getInstance(Locale.JAPAN);
462 
463         try {
464             coll.getCollationKey("A\u0308bc");
465         } catch (OutOfMemoryError e) {
466             errln("Ran out of memory -- probably an infinite loop");
467         }
468     }
469 
470     // sort order of french words with multiple accents has errors
471     //

472     public void Test4132736() {
473         Collator c = Collator.getInstance(Locale.FRANCE);
474 
475         String[] test1 = {
476             "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
477             "e\u0300\u0301",    ">",    "e\u0301\u0300",
478         };
479         compareArray(c, test1);
480     }
481 
482     // The sorting using java.text.CollationKey is not in the exact order
483     //

484     public void Test4133509() {
485         String[] test1 = {
486             "Exception",    "<",    "ExceptionInInitializerError",
487             "Graphics",     "<",    "GraphicsEnvironment",
488             "String",       "<",    "StringBuffer",
489         };
490         compareArray(en_us, test1);
491     }
492 
493     // Collation with decomposition off doesn't work for Europe
494     //

495     public void Test4114077() {
496         // Ensure that we get the same results with decomposition off
497         // as we do with it on....
498 
499         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
500         c.setStrength(Collator.TERTIARY);
501 
502         String[] test1 = {
503             "\u00C0",        "=", "A\u0300",        // Should be equivalent
504             "p\u00eache",         ">", "p\u00e9ch\u00e9",
505             "\u0204",        "=", "E\u030F",
506             "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
507                                                     //   -> a, ring, acute
508             "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
509         };
510         c.setDecomposition(Collator.NO_DECOMPOSITION);
511         compareArray(c, test1);
512 
513         String[] test2 = {
514             "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
515         };
516         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
517         compareArray(c, test2);
518     }
519 
520     // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
521     //

522     public void Test4141640() {
523         //
524         // Rather than just creating a Swedish collator, we might as well
525         // try to instantiate one for every locale available on the system
526         // in order to prevent this sort of bug from cropping up in the future
527         //
528         Locale[] locales = Collator.getAvailableLocales();
529 
530         for (int i = 0; i < locales.length; i++) {
531             try {
532                 Collator c = Collator.getInstance(locales[i]);
533             } catch (Exception e) {
534                 errln("Caught " + e + " creating collator for " + locales[i]);
535             }
536         }
537     }
538 
539     // getCollationKey throws exception for spanish text
540     // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
541     //

542     public void Test4139572() {
543         //
544         // Code pasted straight from the bug report
545         //
546         // create spanish locale and collator
547         Locale l = Locale.of("es", "es");
548         Collator col = Collator.getInstance(l);
549 
550         // this spanish phrase kills it!
551         col.getCollationKey("Nombre De Objeto");
552     }
553 
554     // RuleBasedCollator doesn't use getCollationElementIterator internally
555     //

556     public void Test4146160() throws ParseException {
557         //
558         // Use a custom collator class whose getCollationElementIterator
559         // methods increment a count....
560         //
561         My4146160Collator.count = 0;
562         new My4146160Collator().getCollationKey("1");
563         if (My4146160Collator.count < 1) {
564             errln("getCollationElementIterator not called");
565         }
566 
567         My4146160Collator.count = 0;
568         new My4146160Collator().compare("1", "2");
569         if (My4146160Collator.count < 1) {
570             errln("getCollationElementIterator not called");
571         }
572     }
573 
574     static class My4146160Collator extends RuleBasedCollator {
575         public My4146160Collator() throws ParseException {
576             super(Regression.en_us.getRules());
577         }
578 
579         public CollationElementIterator getCollationElementIterator(
580                                             String text) {
581             count++;
582             return super.getCollationElementIterator(text);
583         }
584         public CollationElementIterator getCollationElementIterator(
585                                             CharacterIterator text) {
586             count++;
587             return super.getCollationElementIterator(text);
588         }
589 
590         public static int count = 0;
591     };
592 
593     // CollationElementIterator.previous broken for expanding char sequences
594     //

595     public void Test4179686() throws ParseException {
596 
597         // Create a collator with a few expanding character sequences in it....
598         RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
599                                                     + " & ae ; \u00e4 & AE ; \u00c4"
600                                                     + " & oe ; \u00f6 & OE ; \u00d6"
601                                                     + " & ue ; \u00fc & UE ; \u00dc");
602 
603         String text = "T\u00f6ne"; // o-umlaut
604 
605         CollationElementIterator iter = coll.getCollationElementIterator(text);
606         Vector elements = new Vector();
607         int elem;
608 
609         // Iterate forward and collect all of the elements into a Vector
610         while ((elem = iter.next()) != iter.NULLORDER) {
611             elements.addElement(new Integer(elem));
612         }
613 
614         // Now iterate backward and make sure they're the same
615         int index = elements.size() - 1;
616         while ((elem = iter.previous()) != iter.NULLORDER) {
617             int expect = ((Integer)elements.elementAt(index)).intValue();
618 
619             if (elem != expect) {
620                 errln("Mismatch at index " + index
621                       + ": got " + Integer.toString(elem,16)
622                       + ", expected " + Integer.toString(expect,16));
623             }
624             index--;
625         }
626     }
627 

628     public void Test4244884() throws ParseException {
629         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
630         coll = new RuleBasedCollator(coll.getRules()
631                 + " & C < ch , cH , Ch , CH < cat < crunchy");
632 
633         String[] testStrings = new String[] {
634             "car",
635             "cave",
636             "clamp",
637             "cramp",
638             "czar",
639             "church",
640             "catalogue",
641             "crunchy",
642             "dog"
643         };
644 
645         for (int i = 1; i < testStrings.length; i++) {
646             if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
647                 errln("error: \"" + testStrings[i - 1]
648                     + "\" is greater than or equal to \"" + testStrings[i]
649                     + "\".");
650             }
651         }
652     }
653 

654     public void Test4179216() throws ParseException {
655         // you can position a CollationElementIterator in the middle of
656         // a contracting character sequence, yielding a bogus collation
657         // element
658         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
659         coll = new RuleBasedCollator(coll.getRules()
660                 + " & C < ch , cH , Ch , CH < cat < crunchy");
661         String testText = "church church catcatcher runcrunchynchy";
662         CollationElementIterator iter = coll.getCollationElementIterator(
663                 testText);
664 
665         // test that the "ch" combination works properly
666         iter.setOffset(4);
667         int elt4 = CollationElementIterator.primaryOrder(iter.next());
668 
669         iter.reset();
670         int elt0 = CollationElementIterator.primaryOrder(iter.next());
671 
672         iter.setOffset(5);
673         int elt5 = CollationElementIterator.primaryOrder(iter.next());
674 
675         if (elt4 != elt0 || elt5 != elt0)
676             errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
677                     + elt4 + "), and 5 (" + elt5 + ") don't match.");
678 
679         // test that the "cat" combination works properly
680         iter.setOffset(14);
681         int elt14 = CollationElementIterator.primaryOrder(iter.next());
682 
683         iter.setOffset(15);
684         int elt15 = CollationElementIterator.primaryOrder(iter.next());
685 
686         iter.setOffset(16);
687         int elt16 = CollationElementIterator.primaryOrder(iter.next());
688 
689         iter.setOffset(17);
690         int elt17 = CollationElementIterator.primaryOrder(iter.next());
691 
692         iter.setOffset(18);
693         int elt18 = CollationElementIterator.primaryOrder(iter.next());
694 
695         iter.setOffset(19);
696         int elt19 = CollationElementIterator.primaryOrder(iter.next());
697 
698         if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
699                 || elt14 != elt18 || elt14 != elt19)
700             errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
701             + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
702             + ", elt18 = " + elt18 + ", elt19 = " + elt19);
703 
704         // now generate a complete list of the collation elements,
705         // first using next() and then using setOffset(), and
706         // make sure both interfaces return the same set of elements
707         iter.reset();
708 
709         int elt = iter.next();
710         int count = 0;
711         while (elt != CollationElementIterator.NULLORDER) {
712             ++count;
713             elt = iter.next();
714         }
715 
716         String[] nextElements = new String[count];
717         String[] setOffsetElements = new String[count];
718         int lastPos = 0;
719 
720         iter.reset();
721         elt = iter.next();
722         count = 0;
723         while (elt != CollationElementIterator.NULLORDER) {
724             nextElements[count++] = testText.substring(lastPos, iter.getOffset());
725             lastPos = iter.getOffset();
726             elt = iter.next();
727         }
728         count = 0;
729         for (int i = 0; i < testText.length(); ) {
730             iter.setOffset(i);
731             lastPos = iter.getOffset();
732             elt = iter.next();
733             setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
734             i = iter.getOffset();
735         }
736         for (int i = 0; i < nextElements.length; i++) {
737             if (nextElements[i].equals(setOffsetElements[i])) {
738                 logln(nextElements[i]);
739             } else {
740                 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
741                     + setOffsetElements[i]);
742             }
743         }
744     }
745 

746     public void Test4216006() throws Exception {
747         // rule parser barfs on "<\u00e0=a\u0300", and on other cases
748         // where the same token (after normalization) appears twice in a row
749         boolean caughtException = false;
750         try {
751             RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
752         }
753         catch (ParseException e) {
754             caughtException = true;
755         }
756         if (!caughtException) {
757             throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
758         }
759 
760         RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
761         collator.setDecomposition(Collator.FULL_DECOMPOSITION);
762         collator.setStrength(Collator.IDENTICAL);
763 
764         String[] tests = {
765             "a\u0300", "=", "\u00e0",
766             "\u00e0",  "=", "a\u0300"
767         };
768 
769         compareArray(collator, tests);
770     }
771 

772     public void Test4171974() {
773         // test French accent ordering more thoroughly
774         String[] frenchList = {
775             "\u0075\u0075",     // u u
776             "\u00fc\u0075",     // u-umlaut u
777             "\u01d6\u0075",     // u-umlaut-macron u
778             "\u016b\u0075",     // u-macron u
779             "\u1e7b\u0075",     // u-macron-umlaut u
780             "\u0075\u00fc",     // u u-umlaut
781             "\u00fc\u00fc",     // u-umlaut u-umlaut
782             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
783             "\u016b\u00fc",     // u-macron u-umlaut
784             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
785             "\u0075\u01d6",     // u u-umlaut-macron
786             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
787             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
788             "\u016b\u01d6",     // u-macron u-umlaut-macron
789             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
790             "\u0075\u016b",     // u u-macron
791             "\u00fc\u016b",     // u-umlaut u-macron
792             "\u01d6\u016b",     // u-umlaut-macron u-macron
793             "\u016b\u016b",     // u-macron u-macron
794             "\u1e7b\u016b",     // u-macron-umlaut u-macron
795             "\u0075\u1e7b",     // u u-macron-umlaut
796             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
797             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
798             "\u016b\u1e7b",     // u-macron u-macron-umlaut
799             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
800         };
801         Collator french = Collator.getInstance(Locale.FRENCH);
802 
803         logln("Testing French order...");
804         checkListOrder(frenchList, french);
805 
806         logln("Testing French order without decomposition...");
807         french.setDecomposition(Collator.NO_DECOMPOSITION);
808         checkListOrder(frenchList, french);
809 
810         String[] englishList = {
811             "\u0075\u0075",     // u u
812             "\u0075\u00fc",     // u u-umlaut
813             "\u0075\u01d6",     // u u-umlaut-macron
814             "\u0075\u016b",     // u u-macron
815             "\u0075\u1e7b",     // u u-macron-umlaut
816             "\u00fc\u0075",     // u-umlaut u
817             "\u00fc\u00fc",     // u-umlaut u-umlaut
818             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
819             "\u00fc\u016b",     // u-umlaut u-macron
820             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
821             "\u01d6\u0075",     // u-umlaut-macron u
822             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
823             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
824             "\u01d6\u016b",     // u-umlaut-macron u-macron
825             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
826             "\u016b\u0075",     // u-macron u
827             "\u016b\u00fc",     // u-macron u-umlaut
828             "\u016b\u01d6",     // u-macron u-umlaut-macron
829             "\u016b\u016b",     // u-macron u-macron
830             "\u016b\u1e7b",     // u-macron u-macron-umlaut
831             "\u1e7b\u0075",     // u-macron-umlaut u
832             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
833             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
834             "\u1e7b\u016b",     // u-macron-umlaut u-macron
835             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
836         };
837         Collator english = Collator.getInstance(Locale.ENGLISH);
838 
839         logln("Testing English order...");
840         checkListOrder(englishList, english);
841 
842         logln("Testing English order without decomposition...");
843         english.setDecomposition(Collator.NO_DECOMPOSITION);
844         checkListOrder(englishList, english);
845     }
846 
847     private void checkListOrder(String[] sortedList, Collator c) {
848         // this function uses the specified Collator to make sure the
849         // passed-in list is already sorted into ascending order
850         for (int i = 0; i < sortedList.length - 1; i++) {
851             if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
852                 errln("List out of order at element #" + i + ": "
853                         + prettify(sortedList[i]) + " >= "
854                         + prettify(sortedList[i + 1]));
855             }
856         }
857     }
858 
859     // CollationElementIterator set doesn't work propertly with next/prev

860     public void Test4663220() {
861         RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
862         CharacterIterator stringIter = new StringCharacterIterator("fox");
863         CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
864 
865         int[] elements_next = new int[3];
866         logln("calling next:");
867         for (int i = 0; i < 3; ++i) {
868             logln("[" + i + "] " + (elements_next[i] = iter.next()));
869         }
870 
871         int[] elements_fwd = new int[3];
872         logln("calling set/next:");
873         for (int i = 0; i < 3; ++i) {
874             iter.setOffset(i);
875             logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
876         }
877 
878         for (int i = 0; i < 3; ++i) {
879             if (elements_next[i] != elements_fwd[i]) {
880                 errln("mismatch at position " + i +
881                       ": " + elements_next[i] +
882                       " != " + elements_fwd[i]);
883             }
884         }
885     }
886 
887     //------------------------------------------------------------------------
888     // Internal utilities
889     //
890     private void compareArray(Collator c, String[] tests) {
891         for (int i = 0; i < tests.length; i += 3) {
892 
893             int expect = 0;
894             if (tests[i+1].equals("<")) {
895                 expect = -1;
896             } else if (tests[i+1].equals(">")) {
897                 expect = 1;
898             } else if (tests[i+1].equals("=")) {
899                 expect = 0;
900             } else {
901                 expect = Integer.decode(tests[i+1]).intValue();
902             }
903 
904             int result = c.compare(tests[i], tests[i+2]);
905             if (sign(result) != sign(expect))
906             {
907                 errln( i/3 + ": compare(" + prettify(tests[i])
908                                     + " , " + prettify(tests[i+2])
909                                     + ") got " + result + "; expected " + expect);
910             }
911             else
912             {
913                 // Collator.compare worked OK; now try the collation keys
914                 CollationKey k1 = c.getCollationKey(tests[i]);
915                 CollationKey k2 = c.getCollationKey(tests[i+2]);
916 
917                 result = k1.compareTo(k2);
918                 if (sign(result) != sign(expect)) {
919                     errln( i/3 + ": key(" + prettify(tests[i])
920                                         + ").compareTo(key(" + prettify(tests[i+2])
921                                         + ")) got " + result + "; expected " + expect);
922 
923                     errln("  " + prettify(k1) + " vs. " + prettify(k2));
924                 }
925             }
926         }
927     }
928 
929     private static final int sign(int i) {
930         if (i < 0) return -1;
931         if (i > 0) return 1;
932         return 0;
933     }
934 
935 
936     static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
937 
938     String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
939     String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
940     String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
941 }

  1 /*
  2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /**
 25  * @test
 26  * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
 27  *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
 28  *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
 29  *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
 30  * @library /java/text/testlib
 31  * @summary Regression tests for Collation and associated classes
 32  * @modules jdk.localedata
 33  * @run junit Regression
 34  */
 35 /*
 36 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
 37 (C) Copyright IBM Corp. 1996 - All Rights Reserved
 38 
 39   The original version of this source code and documentation is copyrighted and
 40 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
 41 provided under terms of a License Agreement between Taligent and Sun. This
 42 technology is protected by multiple US and International patents. This notice and
 43 attribution to Taligent may not be removed.
 44   Taligent is a registered trademark of Taligent, Inc.
 45 */
 46 
 47 import java.text.*;
 48 import java.util.Locale;
 49 import java.util.Vector;
 50 
 51 import org.junit.jupiter.api.Test;
 52 
 53 import static org.junit.jupiter.api.Assertions.fail;
 54 
 55 
 56 public class Regression {

 57 
 58     // CollationElementIterator.reset() doesn't work
 59     //
 60     @Test
 61     public void Test4048446() {
 62         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
 63         CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
 64 
 65         while ( i1.next() != CollationElementIterator.NULLORDER ) {
 66         }
 67         i1.reset();
 68 
 69         TestUtils.compareCollationElementIters(i1, i2);
 70     }
 71 
 72 
 73     // Collator -> rules -> Collator round-trip broken for expanding characters
 74     //
 75     @Test
 76     public void Test4051866() throws ParseException {
 77         // Build a collator containing expanding characters
 78         RuleBasedCollator c1 = new RuleBasedCollator("< o "
 79                                                     +"& oe ,o\u3080"
 80                                                     +"& oe ,\u1530 ,O"
 81                                                     +"& OE ,O\u3080"
 82                                                     +"& OE ,\u1520"
 83                                                     +"< p ,P");
 84 
 85         // Build another using the rules from  the first
 86         RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
 87 
 88         // Make sure they're the same
 89         if (!c1.getRules().equals(c2.getRules())) {
 90             fail("Rules are not equal");
 91         }
 92     }
 93 
 94     // Collator thinks "black-bird" == "black"
 95     //
 96     @Test
 97     public void Test4053636() {
 98         if (en_us.equals("black-bird","black")) {
 99             fail("black-bird == black");
100         }
101     }
102 
103 
104     // CollationElementIterator will not work correctly if the associated
105     // Collator object's mode is changed
106     //
107     @Test
108     public void Test4054238() {
109         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
110 
111         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
112         CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
113 
114         c.setDecomposition(Collator.NO_DECOMPOSITION);
115         CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
116 
117         // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
118         // collator itself is in that mode
119         TestUtils.compareCollationElementIters(i1, i2);
120     }
121 
122     // Collator.IDENTICAL documented but not implemented
123     //
124     @Test
125     public void Test4054734() {
126         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
127         try {
128             c.setStrength(Collator.IDENTICAL);
129         }
130         catch (Exception e) {
131             fail("Caught " + e.toString() + " setting Collator.IDENTICAL");
132         }
133 
134         String[] decomp = {
135             "\u0001",   "<",    "\u0002",
136             "\u0001",   "=",    "\u0001",
137             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
138             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
139         };
140         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
141         compareArray(c, decomp);
142 
143         String[] nodecomp = {
144             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
145         };
146         c.setDecomposition(Collator.NO_DECOMPOSITION);
147         compareArray(c, nodecomp);
148     }
149 
150     // Full Decomposition mode not implemented
151     //
152     @Test
153     public void Test4054736() {
154         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
155         c.setDecomposition(Collator.FULL_DECOMPOSITION);
156 
157         String[] tests = {
158             "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
159         };
160 
161         compareArray(c, tests);
162     }
163 
164     // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
165     //
166     @Test
167     public void Test4058613() {
168         // Creating a default collator doesn't work when Korean is the default
169         // locale
170 
171         Locale oldDefault = Locale.getDefault();
172 
173         Locale.setDefault( Locale.KOREAN );
174         try {
175             Collator c = Collator.getInstance();
176 
177             // Since the fix to this bug was to turn of decomposition for Korean collators,
178             // ensure that's what we got
179             if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
180               fail("Decomposition is not set to NO_DECOMPOSITION");
181             }
182         }
183         finally {
184             Locale.setDefault(oldDefault);
185         }
186     }
187 
188     // RuleBasedCollator.getRules does not return the exact pattern as input
189     // for expanding character sequences
190     //
191     @Test
192     public void Test4059820() {
193         RuleBasedCollator c = null;
194         try {
195             c = new RuleBasedCollator("< a < b , c/a < d < z");
196         } catch (ParseException e) {
197             fail("Exception building collator: " + e.toString());
198             return;
199         }
200         if ( c.getRules().indexOf("c/a") == -1) {
201             fail("returned rules do not contain 'c/a'");
202         }
203     }
204 
205     // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
206     //
207     @Test
208     public void Test4060154() {
209         RuleBasedCollator c = null;
210         try {
211             c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
212                                       + " & H < \u0131, \u0130, i, I" );
213         } catch (ParseException e) {
214             fail("Exception building collator: " + e.toString());
215             return;
216         }
217         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
218 
219         String[] tertiary = {
220             "A",        "<",    "B",
221             "H",        "<",    "\u0131",
222             "H",        "<",    "I",
223             "\u0131",   "<",    "\u0130",
224             "\u0130",   "<",    "i",
225             "\u0130",   ">",    "H",
226         };
227         c.setStrength(Collator.TERTIARY);
228         compareArray(c, tertiary);
229 
230         String[] secondary = {
231             "H",        "<",    "I",
232             "\u0131",   "=",    "\u0130",
233         };
234         c.setStrength(Collator.PRIMARY);
235         compareArray(c, secondary);
236     };
237 
238     // Secondary/Tertiary comparison incorrect in French Secondary
239     //
240     @Test
241     public void Test4062418() throws ParseException {
242         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
243         c.setStrength(Collator.SECONDARY);
244 
245         String[] tests = {
246                 "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
247         };
248 
249         compareArray(c, tests);
250     }
251 
252     // Collator.compare() method broken if either string contains spaces
253     //
254     @Test
255     public void Test4065540() {
256         if (en_us.compare("abcd e", "abcd f") == 0) {
257             fail("'abcd e' == 'abcd f'");
258         }
259     }
260 
261     // Unicode characters need to be recursively decomposed to get the
262     // correct result. For example,
263     // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
264     //
265     @Test
266     public void Test4066189() {
267         String test1 = "\u1EB1";
268         String test2 = "a\u0306\u0300";
269 
270         RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
271         c1.setDecomposition(Collator.FULL_DECOMPOSITION);
272         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
273 
274         RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
275         c2.setDecomposition(Collator.NO_DECOMPOSITION);
276         CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
277 
278         TestUtils.compareCollationElementIters(i1, i2);
279     }
280 
281     // French secondary collation checking at the end of compare iteration fails
282     //
283     @Test
284     public void Test4066696() {
285         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
286         c.setStrength(Collator.SECONDARY);
287 
288         String[] tests = {
289             "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
290         };
291 
292         compareArray(c, tests);
293     }
294 
295 
296     // Bad canonicalization of same-class combining characters
297     //
298     @Test
299     public void Test4076676() {
300         // These combining characters are all in the same class, so they should not
301         // be reordered, and they should compare as unequal.
302         String s1 = "A\u0301\u0302\u0300";
303         String s2 = "A\u0302\u0300\u0301";
304 
305         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
306         c.setStrength(Collator.TERTIARY);
307 
308         if (c.compare(s1,s2) == 0) {
309             fail("Same-class combining chars were reordered");
310         }
311     }
312 
313 
314     // RuleBasedCollator.equals(null) throws NullPointerException
315     //
316     @Test
317     public void Test4079231() {
318         try {
319             if (en_us.equals(null)) {
320                 fail("en_us.equals(null) returned true");
321             }
322         }
323         catch (Exception e) {
324             fail("en_us.equals(null) threw " + e.toString());
325         }
326     }
327 
328     // RuleBasedCollator breaks on "< a < bb" rule
329     //
330     @Test
331     public void Test4078588() throws ParseException {
332         RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
333 
334         int result = rbc.compare("a","bb");
335 
336         if (result != -1) {
337             fail("Compare(a,bb) returned " + result + "; expected -1");
338         }
339     }
340 
341     // Combining characters in different classes not reordered properly.
342     //
343     @Test
344     public void Test4081866() throws ParseException {
345         // These combining characters are all in different classes,
346         // so they should be reordered and the strings should compare as equal.
347         String s1 = "A\u0300\u0316\u0327\u0315";
348         String s2 = "A\u0327\u0316\u0315\u0300";
349 
350         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
351         c.setStrength(Collator.TERTIARY);
352 
353         // Now that the default collators are set to NO_DECOMPOSITION
354         // (as a result of fixing bug 4114077), we must set it explicitly
355         // when we're testing reordering behavior.  -- lwerner, 5/5/98
356         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
357 
358         if (c.compare(s1,s2) != 0) {
359             fail("Combining chars were not reordered");
360         }
361     }
362 
363     // string comparison errors in Scandinavian collators
364     //
365     @Test
366     public void Test4087241() {
367         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
368                                                         Locale.of("da", "DK"));
369         c.setStrength(Collator.SECONDARY);
370 
371         String[] tests = {
372             "\u007a",   "<",    "\u00e6",       // z        < ae
373             "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
374             "Y",        "<",    "u\u0308",      // Y        < u-umlaut
375         };
376 
377         compareArray(c, tests);
378     }
379 
380     // CollationKey takes ignorable strings into account when it shouldn't
381     //
382     @Test
383     public void Test4087243() {
384         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
385         c.setStrength(Collator.TERTIARY);
386 
387         String[] tests = {
388             "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
389         };
390 
391         compareArray(c, tests);
392     }
393 
394     // Mu/micro conflict
395     // Micro symbol and greek lowercase letter Mu should sort identically
396     //
397     @Test
398     public void Test4092260() {
399         Collator c = Collator.getInstance(Locale.of("el"));
400 
401         // will only be equal when FULL_DECOMPOSITION is used
402         c.setDecomposition(Collator.FULL_DECOMPOSITION);
403 
404         String[] tests = {
405             "\u00B5",      "=",    "\u03BC",
406         };
407 
408         compareArray(c, tests);
409     }
410 
411     void Test4095316() {
412         Collator c = Collator.getInstance(Locale.of("el", "GR"));
413         c.setStrength(Collator.TERTIARY);
414         // javadocs for RuleBasedCollator clearly specify that characters containing compatability
415         // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
416         c.setDecomposition(Collator.FULL_DECOMPOSITION);
417 
418         String[] tests = {
419             "\u03D4",      "=",    "\u03AB",
420         };
421 
422         compareArray(c, tests);
423     }
424 
425     @Test
426     public void Test4101940() {
427         try {
428             RuleBasedCollator c = new RuleBasedCollator("< a < b");
429             CollationElementIterator i = c.getCollationElementIterator("");
430             i.reset();
431 
432             if (i.next() != i.NULLORDER) {
433                 fail("next did not return NULLORDER");
434             }
435         }
436         catch (Exception e) {
437             fail("Caught " + e );
438         }
439     }
440 
441     // Collator.compare not handling spaces properly
442     //
443     @Test
444     public void Test4103436() {
445         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
446         c.setStrength(Collator.TERTIARY);
447 
448         String[] tests = {
449             "file",      "<",    "file access",
450             "file",      "<",    "fileaccess",
451         };
452 
453         compareArray(c, tests);
454     }
455 
456     // Collation not Unicode conformant with Hangul syllables
457     //
458     @Test
459     public void Test4114076() {
460         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
461         c.setStrength(Collator.TERTIARY);
462 
463         //
464         // With Canonical decomposition, Hangul syllables should get decomposed
465         // into Jamo, but Jamo characters should not be decomposed into
466         // conjoining Jamo
467         //
468         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
469         String[] test1 = {
470             "\ud4db",   "=",    "\u1111\u1171\u11b6",
471         };
472         compareArray(c, test1);
473 
474         // Full decomposition result should be the same as canonical decomposition
475         // for all hangul.
476         c.setDecomposition(Collator.FULL_DECOMPOSITION);
477         compareArray(c, test1);
478 
479     }
480 
481 
482     // Collator.getCollationKey was hanging on certain character sequences
483     //
484     @Test
485     public void Test4124632() throws Exception {
486         Collator coll = Collator.getInstance(Locale.JAPAN);
487 
488         try {
489             coll.getCollationKey("A\u0308bc");
490         } catch (OutOfMemoryError e) {
491             fail("Ran out of memory -- probably an infinite loop");
492         }
493     }
494 
495     // sort order of french words with multiple accents has errors
496     //
497     @Test
498     public void Test4132736() {
499         Collator c = Collator.getInstance(Locale.FRANCE);
500 
501         String[] test1 = {
502             "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
503             "e\u0300\u0301",    ">",    "e\u0301\u0300",
504         };
505         compareArray(c, test1);
506     }
507 
508     // The sorting using java.text.CollationKey is not in the exact order
509     //
510     @Test
511     public void Test4133509() {
512         String[] test1 = {
513             "Exception",    "<",    "ExceptionInInitializerError",
514             "Graphics",     "<",    "GraphicsEnvironment",
515             "String",       "<",    "StringBuffer",
516         };
517         compareArray(en_us, test1);
518     }
519 
520     // Collation with decomposition off doesn't work for Europe
521     //
522     @Test
523     public void Test4114077() {
524         // Ensure that we get the same results with decomposition off
525         // as we do with it on....
526 
527         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
528         c.setStrength(Collator.TERTIARY);
529 
530         String[] test1 = {
531             "\u00C0",        "=", "A\u0300",        // Should be equivalent
532             "p\u00eache",         ">", "p\u00e9ch\u00e9",
533             "\u0204",        "=", "E\u030F",
534             "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
535                                                     //   -> a, ring, acute
536             "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
537         };
538         c.setDecomposition(Collator.NO_DECOMPOSITION);
539         compareArray(c, test1);
540 
541         String[] test2 = {
542             "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
543         };
544         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
545         compareArray(c, test2);
546     }
547 
548     // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
549     //
550     @Test
551     public void Test4141640() {
552         //
553         // Rather than just creating a Swedish collator, we might as well
554         // try to instantiate one for every locale available on the system
555         // in order to prevent this sort of bug from cropping up in the future
556         //
557         Locale[] locales = Collator.getAvailableLocales();
558 
559         for (int i = 0; i < locales.length; i++) {
560             try {
561                 Collator c = Collator.getInstance(locales[i]);
562             } catch (Exception e) {
563                 fail("Caught " + e + " creating collator for " + locales[i]);
564             }
565         }
566     }
567 
568     // getCollationKey throws exception for spanish text
569     // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
570     //
571     @Test
572     public void Test4139572() {
573         //
574         // Code pasted straight from the bug report
575         //
576         // create spanish locale and collator
577         Locale l = Locale.of("es", "es");
578         Collator col = Collator.getInstance(l);
579 
580         // this spanish phrase kills it!
581         col.getCollationKey("Nombre De Objeto");
582     }
583 
584     // RuleBasedCollator doesn't use getCollationElementIterator internally
585     //
586     @Test
587     public void Test4146160() throws ParseException {
588         //
589         // Use a custom collator class whose getCollationElementIterator
590         // methods increment a count....
591         //
592         My4146160Collator.count = 0;
593         new My4146160Collator().getCollationKey("1");
594         if (My4146160Collator.count < 1) {
595             fail("getCollationElementIterator not called");
596         }
597 
598         My4146160Collator.count = 0;
599         new My4146160Collator().compare("1", "2");
600         if (My4146160Collator.count < 1) {
601             fail("getCollationElementIterator not called");
602         }
603     }
604 
605     static class My4146160Collator extends RuleBasedCollator {
606         public My4146160Collator() throws ParseException {
607             super(Regression.en_us.getRules());
608         }
609 
610         public CollationElementIterator getCollationElementIterator(
611                                             String text) {
612             count++;
613             return super.getCollationElementIterator(text);
614         }
615         public CollationElementIterator getCollationElementIterator(
616                                             CharacterIterator text) {
617             count++;
618             return super.getCollationElementIterator(text);
619         }
620 
621         public static int count = 0;
622     };
623 
624     // CollationElementIterator.previous broken for expanding char sequences
625     //
626     @Test
627     public void Test4179686() throws ParseException {
628 
629         // Create a collator with a few expanding character sequences in it....
630         RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
631                                                     + " & ae ; \u00e4 & AE ; \u00c4"
632                                                     + " & oe ; \u00f6 & OE ; \u00d6"
633                                                     + " & ue ; \u00fc & UE ; \u00dc");
634 
635         String text = "T\u00f6ne"; // o-umlaut
636 
637         CollationElementIterator iter = coll.getCollationElementIterator(text);
638         Vector elements = new Vector();
639         int elem;
640 
641         // Iterate forward and collect all of the elements into a Vector
642         while ((elem = iter.next()) != iter.NULLORDER) {
643             elements.addElement(new Integer(elem));
644         }
645 
646         // Now iterate backward and make sure they're the same
647         int index = elements.size() - 1;
648         while ((elem = iter.previous()) != iter.NULLORDER) {
649             int expect = ((Integer)elements.elementAt(index)).intValue();
650 
651             if (elem != expect) {
652                 fail("Mismatch at index " + index
653                       + ": got " + Integer.toString(elem,16)
654                       + ", expected " + Integer.toString(expect,16));
655             }
656             index--;
657         }
658     }
659 
660     @Test
661     public void Test4244884() throws ParseException {
662         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
663         coll = new RuleBasedCollator(coll.getRules()
664                 + " & C < ch , cH , Ch , CH < cat < crunchy");
665 
666         String[] testStrings = new String[] {
667             "car",
668             "cave",
669             "clamp",
670             "cramp",
671             "czar",
672             "church",
673             "catalogue",
674             "crunchy",
675             "dog"
676         };
677 
678         for (int i = 1; i < testStrings.length; i++) {
679             if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
680                 fail("error: \"" + testStrings[i - 1]
681                     + "\" is greater than or equal to \"" + testStrings[i]
682                     + "\".");
683             }
684         }
685     }
686 
687     @Test
688     public void Test4179216() throws ParseException {
689         // you can position a CollationElementIterator in the middle of
690         // a contracting character sequence, yielding a bogus collation
691         // element
692         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
693         coll = new RuleBasedCollator(coll.getRules()
694                 + " & C < ch , cH , Ch , CH < cat < crunchy");
695         String testText = "church church catcatcher runcrunchynchy";
696         CollationElementIterator iter = coll.getCollationElementIterator(
697                 testText);
698 
699         // test that the "ch" combination works properly
700         iter.setOffset(4);
701         int elt4 = CollationElementIterator.primaryOrder(iter.next());
702 
703         iter.reset();
704         int elt0 = CollationElementIterator.primaryOrder(iter.next());
705 
706         iter.setOffset(5);
707         int elt5 = CollationElementIterator.primaryOrder(iter.next());
708 
709         if (elt4 != elt0 || elt5 != elt0)
710             fail("The collation elements at positions 0 (" + elt0 + "), 4 ("
711                     + elt4 + "), and 5 (" + elt5 + ") don't match.");
712 
713         // test that the "cat" combination works properly
714         iter.setOffset(14);
715         int elt14 = CollationElementIterator.primaryOrder(iter.next());
716 
717         iter.setOffset(15);
718         int elt15 = CollationElementIterator.primaryOrder(iter.next());
719 
720         iter.setOffset(16);
721         int elt16 = CollationElementIterator.primaryOrder(iter.next());
722 
723         iter.setOffset(17);
724         int elt17 = CollationElementIterator.primaryOrder(iter.next());
725 
726         iter.setOffset(18);
727         int elt18 = CollationElementIterator.primaryOrder(iter.next());
728 
729         iter.setOffset(19);
730         int elt19 = CollationElementIterator.primaryOrder(iter.next());
731 
732         if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
733                 || elt14 != elt18 || elt14 != elt19)
734             fail("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
735             + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
736             + ", elt18 = " + elt18 + ", elt19 = " + elt19);
737 
738         // now generate a complete list of the collation elements,
739         // first using next() and then using setOffset(), and
740         // make sure both interfaces return the same set of elements
741         iter.reset();
742 
743         int elt = iter.next();
744         int count = 0;
745         while (elt != CollationElementIterator.NULLORDER) {
746             ++count;
747             elt = iter.next();
748         }
749 
750         String[] nextElements = new String[count];
751         String[] setOffsetElements = new String[count];
752         int lastPos = 0;
753 
754         iter.reset();
755         elt = iter.next();
756         count = 0;
757         while (elt != CollationElementIterator.NULLORDER) {
758             nextElements[count++] = testText.substring(lastPos, iter.getOffset());
759             lastPos = iter.getOffset();
760             elt = iter.next();
761         }
762         count = 0;
763         for (int i = 0; i < testText.length(); ) {
764             iter.setOffset(i);
765             lastPos = iter.getOffset();
766             elt = iter.next();
767             setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
768             i = iter.getOffset();
769         }
770         for (int i = 0; i < nextElements.length; i++) {
771             if (nextElements[i].equals(setOffsetElements[i])) {
772                 System.out.println(nextElements[i]);
773             } else {
774                 fail("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
775                     + setOffsetElements[i]);
776             }
777         }
778     }
779 
780     @Test
781     public void Test4216006() throws Exception {
782         // rule parser barfs on "<\u00e0=a\u0300", and on other cases
783         // where the same token (after normalization) appears twice in a row
784         boolean caughtException = false;
785         try {
786             RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
787         }
788         catch (ParseException e) {
789             caughtException = true;
790         }
791         if (!caughtException) {
792             throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
793         }
794 
795         RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
796         collator.setDecomposition(Collator.FULL_DECOMPOSITION);
797         collator.setStrength(Collator.IDENTICAL);
798 
799         String[] tests = {
800             "a\u0300", "=", "\u00e0",
801             "\u00e0",  "=", "a\u0300"
802         };
803 
804         compareArray(collator, tests);
805     }
806 
807     @Test
808     public void Test4171974() {
809         // test French accent ordering more thoroughly
810         String[] frenchList = {
811             "\u0075\u0075",     // u u
812             "\u00fc\u0075",     // u-umlaut u
813             "\u01d6\u0075",     // u-umlaut-macron u
814             "\u016b\u0075",     // u-macron u
815             "\u1e7b\u0075",     // u-macron-umlaut u
816             "\u0075\u00fc",     // u u-umlaut
817             "\u00fc\u00fc",     // u-umlaut u-umlaut
818             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
819             "\u016b\u00fc",     // u-macron u-umlaut
820             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
821             "\u0075\u01d6",     // u u-umlaut-macron
822             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
823             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
824             "\u016b\u01d6",     // u-macron u-umlaut-macron
825             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
826             "\u0075\u016b",     // u u-macron
827             "\u00fc\u016b",     // u-umlaut u-macron
828             "\u01d6\u016b",     // u-umlaut-macron u-macron
829             "\u016b\u016b",     // u-macron u-macron
830             "\u1e7b\u016b",     // u-macron-umlaut u-macron
831             "\u0075\u1e7b",     // u u-macron-umlaut
832             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
833             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
834             "\u016b\u1e7b",     // u-macron u-macron-umlaut
835             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
836         };
837         Collator french = Collator.getInstance(Locale.FRENCH);
838 
839         System.out.println("Testing French order...");
840         checkListOrder(frenchList, french);
841 
842         System.out.println("Testing French order without decomposition...");
843         french.setDecomposition(Collator.NO_DECOMPOSITION);
844         checkListOrder(frenchList, french);
845 
846         String[] englishList = {
847             "\u0075\u0075",     // u u
848             "\u0075\u00fc",     // u u-umlaut
849             "\u0075\u01d6",     // u u-umlaut-macron
850             "\u0075\u016b",     // u u-macron
851             "\u0075\u1e7b",     // u u-macron-umlaut
852             "\u00fc\u0075",     // u-umlaut u
853             "\u00fc\u00fc",     // u-umlaut u-umlaut
854             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
855             "\u00fc\u016b",     // u-umlaut u-macron
856             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
857             "\u01d6\u0075",     // u-umlaut-macron u
858             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
859             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
860             "\u01d6\u016b",     // u-umlaut-macron u-macron
861             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
862             "\u016b\u0075",     // u-macron u
863             "\u016b\u00fc",     // u-macron u-umlaut
864             "\u016b\u01d6",     // u-macron u-umlaut-macron
865             "\u016b\u016b",     // u-macron u-macron
866             "\u016b\u1e7b",     // u-macron u-macron-umlaut
867             "\u1e7b\u0075",     // u-macron-umlaut u
868             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
869             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
870             "\u1e7b\u016b",     // u-macron-umlaut u-macron
871             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
872         };
873         Collator english = Collator.getInstance(Locale.ENGLISH);
874 
875         System.out.println("Testing English order...");
876         checkListOrder(englishList, english);
877 
878         System.out.println("Testing English order without decomposition...");
879         english.setDecomposition(Collator.NO_DECOMPOSITION);
880         checkListOrder(englishList, english);
881     }
882 
883     private void checkListOrder(String[] sortedList, Collator c) {
884         // this function uses the specified Collator to make sure the
885         // passed-in list is already sorted into ascending order
886         for (int i = 0; i < sortedList.length - 1; i++) {
887             if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
888                 fail("List out of order at element #" + i + ": "
889                         + TestUtils.prettify(sortedList[i]) + " >= "
890                         + TestUtils.prettify(sortedList[i + 1]));
891             }
892         }
893     }
894 
895     // CollationElementIterator set doesn't work propertly with next/prev
896     @Test
897     public void Test4663220() {
898         RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
899         CharacterIterator stringIter = new StringCharacterIterator("fox");
900         CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
901 
902         int[] elements_next = new int[3];
903         System.out.println("calling next:");
904         for (int i = 0; i < 3; ++i) {
905             System.out.println("[" + i + "] " + (elements_next[i] = iter.next()));
906         }
907 
908         int[] elements_fwd = new int[3];
909         System.out.println("calling set/next:");
910         for (int i = 0; i < 3; ++i) {
911             iter.setOffset(i);
912             System.out.println("[" + i + "] " + (elements_fwd[i] = iter.next()));
913         }
914 
915         for (int i = 0; i < 3; ++i) {
916             if (elements_next[i] != elements_fwd[i]) {
917                 fail("mismatch at position " + i +
918                       ": " + elements_next[i] +
919                       " != " + elements_fwd[i]);
920             }
921         }
922     }
923 
924     //------------------------------------------------------------------------
925     // Internal utilities
926     //
927     private void compareArray(Collator c, String[] tests) {
928         for (int i = 0; i < tests.length; i += 3) {
929 
930             int expect = 0;
931             if (tests[i+1].equals("<")) {
932                 expect = -1;
933             } else if (tests[i+1].equals(">")) {
934                 expect = 1;
935             } else if (tests[i+1].equals("=")) {
936                 expect = 0;
937             } else {
938                 expect = Integer.decode(tests[i+1]).intValue();
939             }
940 
941             int result = c.compare(tests[i], tests[i+2]);
942             if (sign(result) != sign(expect))
943             {
944                 fail( i/3 + ": compare(" + TestUtils.prettify(tests[i])
945                                     + " , " + TestUtils.prettify(tests[i+2])
946                                     + ") got " + result + "; expected " + expect);
947             }
948             else
949             {
950                 // Collator.compare worked OK; now try the collation keys
951                 CollationKey k1 = c.getCollationKey(tests[i]);
952                 CollationKey k2 = c.getCollationKey(tests[i+2]);
953 
954                 result = k1.compareTo(k2);
955                 if (sign(result) != sign(expect)) {
956                     fail( i/3 + ": key(" + TestUtils.prettify(tests[i])
957                                         + ").compareTo(key(" + TestUtils.prettify(tests[i+2])
958                                         + ")) got " + result + "; expected " + expect);
959 
960                     fail("  " + TestUtils.prettifyCKey(k1) + " vs. " + TestUtils.prettifyCKey(k2));
961                 }
962             }
963         }
964     }
965 
966     private static final int sign(int i) {
967         if (i < 0) return -1;
968         if (i > 0) return 1;
969         return 0;
970     }
971 
972 
973     static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
974 
975     String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
976     String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
977     String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
978 }
< prev index next >