1 /*
  2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /**
 25  * @test
 26  * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
 27  *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
 28  *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
 29  *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
 30  * @library /java/text/testlib
 31  * @summary Regression tests for Collation and associated classes
 32  * @modules jdk.localedata
 33  * @run junit Regression
 34  */
 35 /*
 36 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
 37 (C) Copyright IBM Corp. 1996 - All Rights Reserved
 38 
 39   The original version of this source code and documentation is copyrighted and
 40 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
 41 provided under terms of a License Agreement between Taligent and Sun. This
 42 technology is protected by multiple US and International patents. This notice and
 43 attribution to Taligent may not be removed.
 44   Taligent is a registered trademark of Taligent, Inc.
 45 */
 46 
 47 import java.text.*;
 48 import java.util.Locale;
 49 import java.util.Vector;
 50 
 51 import org.junit.jupiter.api.Test;
 52 
 53 import static org.junit.jupiter.api.Assertions.fail;
 54 
 55 
 56 public class Regression {
 57 
 58     // CollationElementIterator.reset() doesn't work
 59     //
 60     @Test
 61     public void Test4048446() {
 62         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
 63         CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
 64 
 65         while ( i1.next() != CollationElementIterator.NULLORDER ) {
 66         }
 67         i1.reset();
 68 
 69         TestUtils.compareCollationElementIters(i1, i2);
 70     }
 71 
 72 
 73     // Collator -> rules -> Collator round-trip broken for expanding characters
 74     //
 75     @Test
 76     public void Test4051866() throws ParseException {
 77         // Build a collator containing expanding characters
 78         RuleBasedCollator c1 = new RuleBasedCollator("< o "
 79                                                     +"& oe ,o\u3080"
 80                                                     +"& oe ,\u1530 ,O"
 81                                                     +"& OE ,O\u3080"
 82                                                     +"& OE ,\u1520"
 83                                                     +"< p ,P");
 84 
 85         // Build another using the rules from  the first
 86         RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
 87 
 88         // Make sure they're the same
 89         if (!c1.getRules().equals(c2.getRules())) {
 90             fail("Rules are not equal");
 91         }
 92     }
 93 
 94     // Collator thinks "black-bird" == "black"
 95     //
 96     @Test
 97     public void Test4053636() {
 98         if (en_us.equals("black-bird","black")) {
 99             fail("black-bird == black");
100         }
101     }
102 
103 
104     // CollationElementIterator will not work correctly if the associated
105     // Collator object's mode is changed
106     //
107     @Test
108     public void Test4054238() {
109         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
110 
111         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
112         CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
113 
114         c.setDecomposition(Collator.NO_DECOMPOSITION);
115         CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
116 
117         // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
118         // collator itself is in that mode
119         TestUtils.compareCollationElementIters(i1, i2);
120     }
121 
122     // Collator.IDENTICAL documented but not implemented
123     //
124     @Test
125     public void Test4054734() {
126         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
127         try {
128             c.setStrength(Collator.IDENTICAL);
129         }
130         catch (Exception e) {
131             fail("Caught " + e.toString() + " setting Collator.IDENTICAL");
132         }
133 
134         String[] decomp = {
135             "\u0001",   "<",    "\u0002",
136             "\u0001",   "=",    "\u0001",
137             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
138             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
139         };
140         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
141         compareArray(c, decomp);
142 
143         String[] nodecomp = {
144             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
145         };
146         c.setDecomposition(Collator.NO_DECOMPOSITION);
147         compareArray(c, nodecomp);
148     }
149 
150     // Full Decomposition mode not implemented
151     //
152     @Test
153     public void Test4054736() {
154         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
155         c.setDecomposition(Collator.FULL_DECOMPOSITION);
156 
157         String[] tests = {
158             "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
159         };
160 
161         compareArray(c, tests);
162     }
163 
164     // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
165     //
166     @Test
167     public void Test4058613() {
168         // Creating a default collator doesn't work when Korean is the default
169         // locale
170 
171         Locale oldDefault = Locale.getDefault();
172 
173         Locale.setDefault( Locale.KOREAN );
174         try {
175             Collator c = Collator.getInstance();
176 
177             // Since the fix to this bug was to turn of decomposition for Korean collators,
178             // ensure that's what we got
179             if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
180               fail("Decomposition is not set to NO_DECOMPOSITION");
181             }
182         }
183         finally {
184             Locale.setDefault(oldDefault);
185         }
186     }
187 
188     // RuleBasedCollator.getRules does not return the exact pattern as input
189     // for expanding character sequences
190     //
191     @Test
192     public void Test4059820() {
193         RuleBasedCollator c = null;
194         try {
195             c = new RuleBasedCollator("< a < b , c/a < d < z");
196         } catch (ParseException e) {
197             fail("Exception building collator: " + e.toString());
198             return;
199         }
200         if ( c.getRules().indexOf("c/a") == -1) {
201             fail("returned rules do not contain 'c/a'");
202         }
203     }
204 
205     // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
206     //
207     @Test
208     public void Test4060154() {
209         RuleBasedCollator c = null;
210         try {
211             c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
212                                       + " & H < \u0131, \u0130, i, I" );
213         } catch (ParseException e) {
214             fail("Exception building collator: " + e.toString());
215             return;
216         }
217         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
218 
219         String[] tertiary = {
220             "A",        "<",    "B",
221             "H",        "<",    "\u0131",
222             "H",        "<",    "I",
223             "\u0131",   "<",    "\u0130",
224             "\u0130",   "<",    "i",
225             "\u0130",   ">",    "H",
226         };
227         c.setStrength(Collator.TERTIARY);
228         compareArray(c, tertiary);
229 
230         String[] secondary = {
231             "H",        "<",    "I",
232             "\u0131",   "=",    "\u0130",
233         };
234         c.setStrength(Collator.PRIMARY);
235         compareArray(c, secondary);
236     };
237 
238     // Secondary/Tertiary comparison incorrect in French Secondary
239     //
240     @Test
241     public void Test4062418() throws ParseException {
242         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
243         c.setStrength(Collator.SECONDARY);
244 
245         String[] tests = {
246                 "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
247         };
248 
249         compareArray(c, tests);
250     }
251 
252     // Collator.compare() method broken if either string contains spaces
253     //
254     @Test
255     public void Test4065540() {
256         if (en_us.compare("abcd e", "abcd f") == 0) {
257             fail("'abcd e' == 'abcd f'");
258         }
259     }
260 
261     // Unicode characters need to be recursively decomposed to get the
262     // correct result. For example,
263     // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
264     //
265     @Test
266     public void Test4066189() {
267         String test1 = "\u1EB1";
268         String test2 = "a\u0306\u0300";
269 
270         RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
271         c1.setDecomposition(Collator.FULL_DECOMPOSITION);
272         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
273 
274         RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
275         c2.setDecomposition(Collator.NO_DECOMPOSITION);
276         CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
277 
278         TestUtils.compareCollationElementIters(i1, i2);
279     }
280 
281     // French secondary collation checking at the end of compare iteration fails
282     //
283     @Test
284     public void Test4066696() {
285         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
286         c.setStrength(Collator.SECONDARY);
287 
288         String[] tests = {
289             "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
290         };
291 
292         compareArray(c, tests);
293     }
294 
295 
296     // Bad canonicalization of same-class combining characters
297     //
298     @Test
299     public void Test4076676() {
300         // These combining characters are all in the same class, so they should not
301         // be reordered, and they should compare as unequal.
302         String s1 = "A\u0301\u0302\u0300";
303         String s2 = "A\u0302\u0300\u0301";
304 
305         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
306         c.setStrength(Collator.TERTIARY);
307 
308         if (c.compare(s1,s2) == 0) {
309             fail("Same-class combining chars were reordered");
310         }
311     }
312 
313 
314     // RuleBasedCollator.equals(null) throws NullPointerException
315     //
316     @Test
317     public void Test4079231() {
318         try {
319             if (en_us.equals(null)) {
320                 fail("en_us.equals(null) returned true");
321             }
322         }
323         catch (Exception e) {
324             fail("en_us.equals(null) threw " + e.toString());
325         }
326     }
327 
328     // RuleBasedCollator breaks on "< a < bb" rule
329     //
330     @Test
331     public void Test4078588() throws ParseException {
332         RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
333 
334         int result = rbc.compare("a","bb");
335 
336         if (result != -1) {
337             fail("Compare(a,bb) returned " + result + "; expected -1");
338         }
339     }
340 
341     // Combining characters in different classes not reordered properly.
342     //
343     @Test
344     public void Test4081866() throws ParseException {
345         // These combining characters are all in different classes,
346         // so they should be reordered and the strings should compare as equal.
347         String s1 = "A\u0300\u0316\u0327\u0315";
348         String s2 = "A\u0327\u0316\u0315\u0300";
349 
350         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
351         c.setStrength(Collator.TERTIARY);
352 
353         // Now that the default collators are set to NO_DECOMPOSITION
354         // (as a result of fixing bug 4114077), we must set it explicitly
355         // when we're testing reordering behavior.  -- lwerner, 5/5/98
356         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
357 
358         if (c.compare(s1,s2) != 0) {
359             fail("Combining chars were not reordered");
360         }
361     }
362 
363     // string comparison errors in Scandinavian collators
364     //
365     @Test
366     public void Test4087241() {
367         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
368                                                         Locale.of("da", "DK"));
369         c.setStrength(Collator.SECONDARY);
370 
371         String[] tests = {
372             "\u007a",   "<",    "\u00e6",       // z        < ae
373             "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
374             "Y",        "<",    "u\u0308",      // Y        < u-umlaut
375         };
376 
377         compareArray(c, tests);
378     }
379 
380     // CollationKey takes ignorable strings into account when it shouldn't
381     //
382     @Test
383     public void Test4087243() {
384         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
385         c.setStrength(Collator.TERTIARY);
386 
387         String[] tests = {
388             "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
389         };
390 
391         compareArray(c, tests);
392     }
393 
394     // Mu/micro conflict
395     // Micro symbol and greek lowercase letter Mu should sort identically
396     //
397     @Test
398     public void Test4092260() {
399         Collator c = Collator.getInstance(Locale.of("el"));
400 
401         // will only be equal when FULL_DECOMPOSITION is used
402         c.setDecomposition(Collator.FULL_DECOMPOSITION);
403 
404         String[] tests = {
405             "\u00B5",      "=",    "\u03BC",
406         };
407 
408         compareArray(c, tests);
409     }
410 
411     void Test4095316() {
412         Collator c = Collator.getInstance(Locale.of("el", "GR"));
413         c.setStrength(Collator.TERTIARY);
414         // javadocs for RuleBasedCollator clearly specify that characters containing compatability
415         // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
416         c.setDecomposition(Collator.FULL_DECOMPOSITION);
417 
418         String[] tests = {
419             "\u03D4",      "=",    "\u03AB",
420         };
421 
422         compareArray(c, tests);
423     }
424 
425     @Test
426     public void Test4101940() {
427         try {
428             RuleBasedCollator c = new RuleBasedCollator("< a < b");
429             CollationElementIterator i = c.getCollationElementIterator("");
430             i.reset();
431 
432             if (i.next() != i.NULLORDER) {
433                 fail("next did not return NULLORDER");
434             }
435         }
436         catch (Exception e) {
437             fail("Caught " + e );
438         }
439     }
440 
441     // Collator.compare not handling spaces properly
442     //
443     @Test
444     public void Test4103436() {
445         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
446         c.setStrength(Collator.TERTIARY);
447 
448         String[] tests = {
449             "file",      "<",    "file access",
450             "file",      "<",    "fileaccess",
451         };
452 
453         compareArray(c, tests);
454     }
455 
456     // Collation not Unicode conformant with Hangul syllables
457     //
458     @Test
459     public void Test4114076() {
460         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
461         c.setStrength(Collator.TERTIARY);
462 
463         //
464         // With Canonical decomposition, Hangul syllables should get decomposed
465         // into Jamo, but Jamo characters should not be decomposed into
466         // conjoining Jamo
467         //
468         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
469         String[] test1 = {
470             "\ud4db",   "=",    "\u1111\u1171\u11b6",
471         };
472         compareArray(c, test1);
473 
474         // Full decomposition result should be the same as canonical decomposition
475         // for all hangul.
476         c.setDecomposition(Collator.FULL_DECOMPOSITION);
477         compareArray(c, test1);
478 
479     }
480 
481 
482     // Collator.getCollationKey was hanging on certain character sequences
483     //
484     @Test
485     public void Test4124632() throws Exception {
486         Collator coll = Collator.getInstance(Locale.JAPAN);
487 
488         try {
489             coll.getCollationKey("A\u0308bc");
490         } catch (OutOfMemoryError e) {
491             fail("Ran out of memory -- probably an infinite loop");
492         }
493     }
494 
495     // sort order of french words with multiple accents has errors
496     //
497     @Test
498     public void Test4132736() {
499         Collator c = Collator.getInstance(Locale.FRANCE);
500 
501         String[] test1 = {
502             "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
503             "e\u0300\u0301",    ">",    "e\u0301\u0300",
504         };
505         compareArray(c, test1);
506     }
507 
508     // The sorting using java.text.CollationKey is not in the exact order
509     //
510     @Test
511     public void Test4133509() {
512         String[] test1 = {
513             "Exception",    "<",    "ExceptionInInitializerError",
514             "Graphics",     "<",    "GraphicsEnvironment",
515             "String",       "<",    "StringBuffer",
516         };
517         compareArray(en_us, test1);
518     }
519 
520     // Collation with decomposition off doesn't work for Europe
521     //
522     @Test
523     public void Test4114077() {
524         // Ensure that we get the same results with decomposition off
525         // as we do with it on....
526 
527         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
528         c.setStrength(Collator.TERTIARY);
529 
530         String[] test1 = {
531             "\u00C0",        "=", "A\u0300",        // Should be equivalent
532             "p\u00eache",         ">", "p\u00e9ch\u00e9",
533             "\u0204",        "=", "E\u030F",
534             "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
535                                                     //   -> a, ring, acute
536             "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
537         };
538         c.setDecomposition(Collator.NO_DECOMPOSITION);
539         compareArray(c, test1);
540 
541         String[] test2 = {
542             "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
543         };
544         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
545         compareArray(c, test2);
546     }
547 
548     // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
549     //
550     @Test
551     public void Test4141640() {
552         //
553         // Rather than just creating a Swedish collator, we might as well
554         // try to instantiate one for every locale available on the system
555         // in order to prevent this sort of bug from cropping up in the future
556         //
557         Locale[] locales = Collator.getAvailableLocales();
558 
559         for (int i = 0; i < locales.length; i++) {
560             try {
561                 Collator c = Collator.getInstance(locales[i]);
562             } catch (Exception e) {
563                 fail("Caught " + e + " creating collator for " + locales[i]);
564             }
565         }
566     }
567 
568     // getCollationKey throws exception for spanish text
569     // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
570     //
571     @Test
572     public void Test4139572() {
573         //
574         // Code pasted straight from the bug report
575         //
576         // create spanish locale and collator
577         Locale l = Locale.of("es", "es");
578         Collator col = Collator.getInstance(l);
579 
580         // this spanish phrase kills it!
581         col.getCollationKey("Nombre De Objeto");
582     }
583 
584     // RuleBasedCollator doesn't use getCollationElementIterator internally
585     //
586     @Test
587     public void Test4146160() throws ParseException {
588         //
589         // Use a custom collator class whose getCollationElementIterator
590         // methods increment a count....
591         //
592         My4146160Collator.count = 0;
593         new My4146160Collator().getCollationKey("1");
594         if (My4146160Collator.count < 1) {
595             fail("getCollationElementIterator not called");
596         }
597 
598         My4146160Collator.count = 0;
599         new My4146160Collator().compare("1", "2");
600         if (My4146160Collator.count < 1) {
601             fail("getCollationElementIterator not called");
602         }
603     }
604 
605     static class My4146160Collator extends RuleBasedCollator {
606         public My4146160Collator() throws ParseException {
607             super(Regression.en_us.getRules());
608         }
609 
610         public CollationElementIterator getCollationElementIterator(
611                                             String text) {
612             count++;
613             return super.getCollationElementIterator(text);
614         }
615         public CollationElementIterator getCollationElementIterator(
616                                             CharacterIterator text) {
617             count++;
618             return super.getCollationElementIterator(text);
619         }
620 
621         public static int count = 0;
622     };
623 
624     // CollationElementIterator.previous broken for expanding char sequences
625     //
626     @Test
627     public void Test4179686() throws ParseException {
628 
629         // Create a collator with a few expanding character sequences in it....
630         RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
631                                                     + " & ae ; \u00e4 & AE ; \u00c4"
632                                                     + " & oe ; \u00f6 & OE ; \u00d6"
633                                                     + " & ue ; \u00fc & UE ; \u00dc");
634 
635         String text = "T\u00f6ne"; // o-umlaut
636 
637         CollationElementIterator iter = coll.getCollationElementIterator(text);
638         Vector elements = new Vector();
639         int elem;
640 
641         // Iterate forward and collect all of the elements into a Vector
642         while ((elem = iter.next()) != iter.NULLORDER) {
643             elements.addElement(new Integer(elem));
644         }
645 
646         // Now iterate backward and make sure they're the same
647         int index = elements.size() - 1;
648         while ((elem = iter.previous()) != iter.NULLORDER) {
649             int expect = ((Integer)elements.elementAt(index)).intValue();
650 
651             if (elem != expect) {
652                 fail("Mismatch at index " + index
653                       + ": got " + Integer.toString(elem,16)
654                       + ", expected " + Integer.toString(expect,16));
655             }
656             index--;
657         }
658     }
659 
660     @Test
661     public void Test4244884() throws ParseException {
662         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
663         coll = new RuleBasedCollator(coll.getRules()
664                 + " & C < ch , cH , Ch , CH < cat < crunchy");
665 
666         String[] testStrings = new String[] {
667             "car",
668             "cave",
669             "clamp",
670             "cramp",
671             "czar",
672             "church",
673             "catalogue",
674             "crunchy",
675             "dog"
676         };
677 
678         for (int i = 1; i < testStrings.length; i++) {
679             if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
680                 fail("error: \"" + testStrings[i - 1]
681                     + "\" is greater than or equal to \"" + testStrings[i]
682                     + "\".");
683             }
684         }
685     }
686 
687     @Test
688     public void Test4179216() throws ParseException {
689         // you can position a CollationElementIterator in the middle of
690         // a contracting character sequence, yielding a bogus collation
691         // element
692         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
693         coll = new RuleBasedCollator(coll.getRules()
694                 + " & C < ch , cH , Ch , CH < cat < crunchy");
695         String testText = "church church catcatcher runcrunchynchy";
696         CollationElementIterator iter = coll.getCollationElementIterator(
697                 testText);
698 
699         // test that the "ch" combination works properly
700         iter.setOffset(4);
701         int elt4 = CollationElementIterator.primaryOrder(iter.next());
702 
703         iter.reset();
704         int elt0 = CollationElementIterator.primaryOrder(iter.next());
705 
706         iter.setOffset(5);
707         int elt5 = CollationElementIterator.primaryOrder(iter.next());
708 
709         if (elt4 != elt0 || elt5 != elt0)
710             fail("The collation elements at positions 0 (" + elt0 + "), 4 ("
711                     + elt4 + "), and 5 (" + elt5 + ") don't match.");
712 
713         // test that the "cat" combination works properly
714         iter.setOffset(14);
715         int elt14 = CollationElementIterator.primaryOrder(iter.next());
716 
717         iter.setOffset(15);
718         int elt15 = CollationElementIterator.primaryOrder(iter.next());
719 
720         iter.setOffset(16);
721         int elt16 = CollationElementIterator.primaryOrder(iter.next());
722 
723         iter.setOffset(17);
724         int elt17 = CollationElementIterator.primaryOrder(iter.next());
725 
726         iter.setOffset(18);
727         int elt18 = CollationElementIterator.primaryOrder(iter.next());
728 
729         iter.setOffset(19);
730         int elt19 = CollationElementIterator.primaryOrder(iter.next());
731 
732         if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
733                 || elt14 != elt18 || elt14 != elt19)
734             fail("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
735             + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
736             + ", elt18 = " + elt18 + ", elt19 = " + elt19);
737 
738         // now generate a complete list of the collation elements,
739         // first using next() and then using setOffset(), and
740         // make sure both interfaces return the same set of elements
741         iter.reset();
742 
743         int elt = iter.next();
744         int count = 0;
745         while (elt != CollationElementIterator.NULLORDER) {
746             ++count;
747             elt = iter.next();
748         }
749 
750         String[] nextElements = new String[count];
751         String[] setOffsetElements = new String[count];
752         int lastPos = 0;
753 
754         iter.reset();
755         elt = iter.next();
756         count = 0;
757         while (elt != CollationElementIterator.NULLORDER) {
758             nextElements[count++] = testText.substring(lastPos, iter.getOffset());
759             lastPos = iter.getOffset();
760             elt = iter.next();
761         }
762         count = 0;
763         for (int i = 0; i < testText.length(); ) {
764             iter.setOffset(i);
765             lastPos = iter.getOffset();
766             elt = iter.next();
767             setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
768             i = iter.getOffset();
769         }
770         for (int i = 0; i < nextElements.length; i++) {
771             if (nextElements[i].equals(setOffsetElements[i])) {
772                 System.out.println(nextElements[i]);
773             } else {
774                 fail("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
775                     + setOffsetElements[i]);
776             }
777         }
778     }
779 
780     @Test
781     public void Test4216006() throws Exception {
782         // rule parser barfs on "<\u00e0=a\u0300", and on other cases
783         // where the same token (after normalization) appears twice in a row
784         boolean caughtException = false;
785         try {
786             RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
787         }
788         catch (ParseException e) {
789             caughtException = true;
790         }
791         if (!caughtException) {
792             throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
793         }
794 
795         RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
796         collator.setDecomposition(Collator.FULL_DECOMPOSITION);
797         collator.setStrength(Collator.IDENTICAL);
798 
799         String[] tests = {
800             "a\u0300", "=", "\u00e0",
801             "\u00e0",  "=", "a\u0300"
802         };
803 
804         compareArray(collator, tests);
805     }
806 
807     @Test
808     public void Test4171974() {
809         // test French accent ordering more thoroughly
810         String[] frenchList = {
811             "\u0075\u0075",     // u u
812             "\u00fc\u0075",     // u-umlaut u
813             "\u01d6\u0075",     // u-umlaut-macron u
814             "\u016b\u0075",     // u-macron u
815             "\u1e7b\u0075",     // u-macron-umlaut u
816             "\u0075\u00fc",     // u u-umlaut
817             "\u00fc\u00fc",     // u-umlaut u-umlaut
818             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
819             "\u016b\u00fc",     // u-macron u-umlaut
820             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
821             "\u0075\u01d6",     // u u-umlaut-macron
822             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
823             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
824             "\u016b\u01d6",     // u-macron u-umlaut-macron
825             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
826             "\u0075\u016b",     // u u-macron
827             "\u00fc\u016b",     // u-umlaut u-macron
828             "\u01d6\u016b",     // u-umlaut-macron u-macron
829             "\u016b\u016b",     // u-macron u-macron
830             "\u1e7b\u016b",     // u-macron-umlaut u-macron
831             "\u0075\u1e7b",     // u u-macron-umlaut
832             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
833             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
834             "\u016b\u1e7b",     // u-macron u-macron-umlaut
835             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
836         };
837         Collator french = Collator.getInstance(Locale.FRENCH);
838 
839         System.out.println("Testing French order...");
840         checkListOrder(frenchList, french);
841 
842         System.out.println("Testing French order without decomposition...");
843         french.setDecomposition(Collator.NO_DECOMPOSITION);
844         checkListOrder(frenchList, french);
845 
846         String[] englishList = {
847             "\u0075\u0075",     // u u
848             "\u0075\u00fc",     // u u-umlaut
849             "\u0075\u01d6",     // u u-umlaut-macron
850             "\u0075\u016b",     // u u-macron
851             "\u0075\u1e7b",     // u u-macron-umlaut
852             "\u00fc\u0075",     // u-umlaut u
853             "\u00fc\u00fc",     // u-umlaut u-umlaut
854             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
855             "\u00fc\u016b",     // u-umlaut u-macron
856             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
857             "\u01d6\u0075",     // u-umlaut-macron u
858             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
859             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
860             "\u01d6\u016b",     // u-umlaut-macron u-macron
861             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
862             "\u016b\u0075",     // u-macron u
863             "\u016b\u00fc",     // u-macron u-umlaut
864             "\u016b\u01d6",     // u-macron u-umlaut-macron
865             "\u016b\u016b",     // u-macron u-macron
866             "\u016b\u1e7b",     // u-macron u-macron-umlaut
867             "\u1e7b\u0075",     // u-macron-umlaut u
868             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
869             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
870             "\u1e7b\u016b",     // u-macron-umlaut u-macron
871             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
872         };
873         Collator english = Collator.getInstance(Locale.ENGLISH);
874 
875         System.out.println("Testing English order...");
876         checkListOrder(englishList, english);
877 
878         System.out.println("Testing English order without decomposition...");
879         english.setDecomposition(Collator.NO_DECOMPOSITION);
880         checkListOrder(englishList, english);
881     }
882 
883     private void checkListOrder(String[] sortedList, Collator c) {
884         // this function uses the specified Collator to make sure the
885         // passed-in list is already sorted into ascending order
886         for (int i = 0; i < sortedList.length - 1; i++) {
887             if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
888                 fail("List out of order at element #" + i + ": "
889                         + TestUtils.prettify(sortedList[i]) + " >= "
890                         + TestUtils.prettify(sortedList[i + 1]));
891             }
892         }
893     }
894 
895     // CollationElementIterator set doesn't work propertly with next/prev
896     @Test
897     public void Test4663220() {
898         RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
899         CharacterIterator stringIter = new StringCharacterIterator("fox");
900         CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
901 
902         int[] elements_next = new int[3];
903         System.out.println("calling next:");
904         for (int i = 0; i < 3; ++i) {
905             System.out.println("[" + i + "] " + (elements_next[i] = iter.next()));
906         }
907 
908         int[] elements_fwd = new int[3];
909         System.out.println("calling set/next:");
910         for (int i = 0; i < 3; ++i) {
911             iter.setOffset(i);
912             System.out.println("[" + i + "] " + (elements_fwd[i] = iter.next()));
913         }
914 
915         for (int i = 0; i < 3; ++i) {
916             if (elements_next[i] != elements_fwd[i]) {
917                 fail("mismatch at position " + i +
918                       ": " + elements_next[i] +
919                       " != " + elements_fwd[i]);
920             }
921         }
922     }
923 
924     //------------------------------------------------------------------------
925     // Internal utilities
926     //
927     private void compareArray(Collator c, String[] tests) {
928         for (int i = 0; i < tests.length; i += 3) {
929 
930             int expect = 0;
931             if (tests[i+1].equals("<")) {
932                 expect = -1;
933             } else if (tests[i+1].equals(">")) {
934                 expect = 1;
935             } else if (tests[i+1].equals("=")) {
936                 expect = 0;
937             } else {
938                 expect = Integer.decode(tests[i+1]).intValue();
939             }
940 
941             int result = c.compare(tests[i], tests[i+2]);
942             if (sign(result) != sign(expect))
943             {
944                 fail( i/3 + ": compare(" + TestUtils.prettify(tests[i])
945                                     + " , " + TestUtils.prettify(tests[i+2])
946                                     + ") got " + result + "; expected " + expect);
947             }
948             else
949             {
950                 // Collator.compare worked OK; now try the collation keys
951                 CollationKey k1 = c.getCollationKey(tests[i]);
952                 CollationKey k2 = c.getCollationKey(tests[i+2]);
953 
954                 result = k1.compareTo(k2);
955                 if (sign(result) != sign(expect)) {
956                     fail( i/3 + ": key(" + TestUtils.prettify(tests[i])
957                                         + ").compareTo(key(" + TestUtils.prettify(tests[i+2])
958                                         + ")) got " + result + "; expected " + expect);
959 
960                     fail("  " + TestUtils.prettifyCKey(k1) + " vs. " + TestUtils.prettifyCKey(k2));
961                 }
962             }
963         }
964     }
965 
966     private static final int sign(int i) {
967         if (i < 0) return -1;
968         if (i > 0) return 1;
969         return 0;
970     }
971 
972 
973     static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
974 
975     String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
976     String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
977     String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
978 }