1 /*
  2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /**
 25  * @test
 26  * @bug 4048446 4051866 4053636 4054238 4054734 4054736 4058613 4059820 4060154
 27  *      4062418 4065540 4066189 4066696 4076676 4078588 4079231 4081866 4087241
 28  *      4087243 4092260 4095316 4101940 4103436 4114076 4114077 4124632 4132736
 29  *      4133509 4139572 4141640 4179126 4179686 4244884 4663220
 30  * @library /java/text/testlib
 31  * @summary Regression tests for Collation and associated classes
 32  * @modules jdk.localedata
 33  */
 34 /*
 35 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
 36 (C) Copyright IBM Corp. 1996 - All Rights Reserved
 37 
 38   The original version of this source code and documentation is copyrighted and
 39 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
 40 provided under terms of a License Agreement between Taligent and Sun. This
 41 technology is protected by multiple US and International patents. This notice and
 42 attribution to Taligent may not be removed.
 43   Taligent is a registered trademark of Taligent, Inc.
 44 */
 45 
 46 import java.text.*;
 47 import java.util.Locale;
 48 import java.util.Vector;
 49 
 50 
 51 public class Regression extends CollatorTest {
 52 
 53     public static void main(String[] args) throws Exception {
 54         new Regression().run(args);
 55     }
 56 
 57     // CollationElementIterator.reset() doesn't work
 58     //
 59     public void Test4048446() {
 60         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
 61         CollationElementIterator i2 = en_us.getCollationElementIterator(test1);
 62 
 63         while ( i1.next() != CollationElementIterator.NULLORDER ) {
 64         }
 65         i1.reset();
 66 
 67         assertEqual(i1, i2);
 68     }
 69 
 70 
 71     // Collator -> rules -> Collator round-trip broken for expanding characters
 72     //
 73     public void Test4051866() throws ParseException {
 74         // Build a collator containing expanding characters
 75         RuleBasedCollator c1 = new RuleBasedCollator("< o "
 76                                                     +"& oe ,o\u3080"
 77                                                     +"& oe ,\u1530 ,O"
 78                                                     +"& OE ,O\u3080"
 79                                                     +"& OE ,\u1520"
 80                                                     +"< p ,P");
 81 
 82         // Build another using the rules from  the first
 83         RuleBasedCollator c2 = new RuleBasedCollator(c1.getRules());
 84 
 85         // Make sure they're the same
 86         if (!c1.getRules().equals(c2.getRules())) {
 87             errln("Rules are not equal");
 88         }
 89     }
 90 
 91     // Collator thinks "black-bird" == "black"
 92     //
 93     public void Test4053636() {
 94         if (en_us.equals("black-bird","black")) {
 95             errln("black-bird == black");
 96         }
 97     }
 98 
 99 
100     // CollationElementIterator will not work correctly if the associated
101     // Collator object's mode is changed
102     //
103     public void Test4054238() {
104         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
105 
106         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
107         CollationElementIterator i1 = en_us.getCollationElementIterator(test3);
108 
109         c.setDecomposition(Collator.NO_DECOMPOSITION);
110         CollationElementIterator i2 = en_us.getCollationElementIterator(test3);
111 
112         // At this point, BOTH iterators should use NO_DECOMPOSITION, since the
113         // collator itself is in that mode
114         assertEqual(i1, i2);
115     }
116 
117     // Collator.IDENTICAL documented but not implemented
118     //
119     public void Test4054734() {
120         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
121         try {
122             c.setStrength(Collator.IDENTICAL);
123         }
124         catch (Exception e) {
125             errln("Caught " + e.toString() + " setting Collator.IDENTICAL");
126         }
127 
128         String[] decomp = {
129             "\u0001",   "<",    "\u0002",
130             "\u0001",   "=",    "\u0001",
131             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
132             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
133         };
134         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
135         compareArray(c, decomp);
136 
137         String[] nodecomp = {
138             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
139         };
140         c.setDecomposition(Collator.NO_DECOMPOSITION);
141         compareArray(c, nodecomp);
142     }
143 
144     // Full Decomposition mode not implemented
145     //
146     public void Test4054736() {
147         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
148         c.setDecomposition(Collator.FULL_DECOMPOSITION);
149 
150         String[] tests = {
151             "\uFB4f", "=", "\u05D0\u05DC",  // Alef-Lamed vs. Alef, Lamed
152         };
153 
154         compareArray(c, tests);
155     }
156 
157     // Collator.getInstance() causes an ArrayIndexOutofBoundsException for Korean
158     //
159     public void Test4058613() {
160         // Creating a default collator doesn't work when Korean is the default
161         // locale
162 
163         Locale oldDefault = Locale.getDefault();
164 
165         Locale.setDefault( Locale.KOREAN );
166         try {
167             Collator c = Collator.getInstance();
168 
169             // Since the fix to this bug was to turn of decomposition for Korean collators,
170             // ensure that's what we got
171             if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
172               errln("Decomposition is not set to NO_DECOMPOSITION");
173             }
174         }
175         finally {
176             Locale.setDefault(oldDefault);
177         }
178     }
179 
180     // RuleBasedCollator.getRules does not return the exact pattern as input
181     // for expanding character sequences
182     //
183     public void Test4059820() {
184         RuleBasedCollator c = null;
185         try {
186             c = new RuleBasedCollator("< a < b , c/a < d < z");
187         } catch (ParseException e) {
188             errln("Exception building collator: " + e.toString());
189             return;
190         }
191         if ( c.getRules().indexOf("c/a") == -1) {
192             errln("returned rules do not contain 'c/a'");
193         }
194     }
195 
196     // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
197     //
198     public void Test4060154() {
199         RuleBasedCollator c = null;
200         try {
201             c = new RuleBasedCollator("< g, G < h, H < i, I < j, J"
202                                       + " & H < \u0131, \u0130, i, I" );
203         } catch (ParseException e) {
204             errln("Exception building collator: " + e.toString());
205             return;
206         }
207         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
208 
209         String[] tertiary = {
210             "A",        "<",    "B",
211             "H",        "<",    "\u0131",
212             "H",        "<",    "I",
213             "\u0131",   "<",    "\u0130",
214             "\u0130",   "<",    "i",
215             "\u0130",   ">",    "H",
216         };
217         c.setStrength(Collator.TERTIARY);
218         compareArray(c, tertiary);
219 
220         String[] secondary = {
221             "H",        "<",    "I",
222             "\u0131",   "=",    "\u0130",
223         };
224         c.setStrength(Collator.PRIMARY);
225         compareArray(c, secondary);
226     };
227 
228     // Secondary/Tertiary comparison incorrect in French Secondary
229     //
230     public void Test4062418() throws ParseException {
231         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
232         c.setStrength(Collator.SECONDARY);
233 
234         String[] tests = {
235                 "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
236         };
237 
238         compareArray(c, tests);
239     }
240 
241     // Collator.compare() method broken if either string contains spaces
242     //
243     public void Test4065540() {
244         if (en_us.compare("abcd e", "abcd f") == 0) {
245             errln("'abcd e' == 'abcd f'");
246         }
247     }
248 
249     // Unicode characters need to be recursively decomposed to get the
250     // correct result. For example,
251     // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
252     //
253     public void Test4066189() {
254         String test1 = "\u1EB1";
255         String test2 = "a\u0306\u0300";
256 
257         RuleBasedCollator c1 = (RuleBasedCollator) en_us.clone();
258         c1.setDecomposition(Collator.FULL_DECOMPOSITION);
259         CollationElementIterator i1 = en_us.getCollationElementIterator(test1);
260 
261         RuleBasedCollator c2 = (RuleBasedCollator) en_us.clone();
262         c2.setDecomposition(Collator.NO_DECOMPOSITION);
263         CollationElementIterator i2 = en_us.getCollationElementIterator(test2);
264 
265         assertEqual(i1, i2);
266     }
267 
268     // French secondary collation checking at the end of compare iteration fails
269     //
270     public void Test4066696() {
271         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
272         c.setStrength(Collator.SECONDARY);
273 
274         String[] tests = {
275             "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
276         };
277 
278         compareArray(c, tests);
279     }
280 
281 
282     // Bad canonicalization of same-class combining characters
283     //
284     public void Test4076676() {
285         // These combining characters are all in the same class, so they should not
286         // be reordered, and they should compare as unequal.
287         String s1 = "A\u0301\u0302\u0300";
288         String s2 = "A\u0302\u0300\u0301";
289 
290         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
291         c.setStrength(Collator.TERTIARY);
292 
293         if (c.compare(s1,s2) == 0) {
294             errln("Same-class combining chars were reordered");
295         }
296     }
297 
298 
299     // RuleBasedCollator.equals(null) throws NullPointerException
300     //
301     public void Test4079231() {
302         try {
303             if (en_us.equals(null)) {
304                 errln("en_us.equals(null) returned true");
305             }
306         }
307         catch (Exception e) {
308             errln("en_us.equals(null) threw " + e.toString());
309         }
310     }
311 
312     // RuleBasedCollator breaks on "< a < bb" rule
313     //
314     public void Test4078588() throws ParseException {
315         RuleBasedCollator rbc=new RuleBasedCollator("< a < bb");
316 
317         int result = rbc.compare("a","bb");
318 
319         if (result != -1) {
320             errln("Compare(a,bb) returned " + result + "; expected -1");
321         }
322     }
323 
324     // Combining characters in different classes not reordered properly.
325     //
326     public void Test4081866() throws ParseException {
327         // These combining characters are all in different classes,
328         // so they should be reordered and the strings should compare as equal.
329         String s1 = "A\u0300\u0316\u0327\u0315";
330         String s2 = "A\u0327\u0316\u0315\u0300";
331 
332         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
333         c.setStrength(Collator.TERTIARY);
334 
335         // Now that the default collators are set to NO_DECOMPOSITION
336         // (as a result of fixing bug 4114077), we must set it explicitly
337         // when we're testing reordering behavior.  -- lwerner, 5/5/98
338         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
339 
340         if (c.compare(s1,s2) != 0) {
341             errln("Combining chars were not reordered");
342         }
343     }
344 
345     // string comparison errors in Scandinavian collators
346     //
347     public void Test4087241() {
348         RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(
349                                                         Locale.of("da", "DK"));
350         c.setStrength(Collator.SECONDARY);
351 
352         String[] tests = {
353             "\u007a",   "<",    "\u00e6",       // z        < ae
354             "a\u0308",  "<",    "a\u030a",      // a-unlaut < a-ring
355             "Y",        "<",    "u\u0308",      // Y        < u-umlaut
356         };
357 
358         compareArray(c, tests);
359     }
360 
361     // CollationKey takes ignorable strings into account when it shouldn't
362     //
363     public void Test4087243() {
364         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
365         c.setStrength(Collator.TERTIARY);
366 
367         String[] tests = {
368             "123",      "=",    "123\u0001",        // 1 2 3  =  1 2 3 ctrl-A
369         };
370 
371         compareArray(c, tests);
372     }
373 
374     // Mu/micro conflict
375     // Micro symbol and greek lowercase letter Mu should sort identically
376     //
377     public void Test4092260() {
378         Collator c = Collator.getInstance(Locale.of("el"));
379 
380         // will only be equal when FULL_DECOMPOSITION is used
381         c.setDecomposition(Collator.FULL_DECOMPOSITION);
382 
383         String[] tests = {
384             "\u00B5",      "=",    "\u03BC",
385         };
386 
387         compareArray(c, tests);
388     }
389 
390     void Test4095316() {
391         Collator c = Collator.getInstance(Locale.of("el", "GR"));
392         c.setStrength(Collator.TERTIARY);
393         // javadocs for RuleBasedCollator clearly specify that characters containing compatability
394         // chars MUST use FULL_DECOMPOSITION to get accurate comparisons.
395         c.setDecomposition(Collator.FULL_DECOMPOSITION);
396 
397         String[] tests = {
398             "\u03D4",      "=",    "\u03AB",
399         };
400 
401         compareArray(c, tests);
402     }
403 
404     public void Test4101940() {
405         try {
406             RuleBasedCollator c = new RuleBasedCollator("< a < b");
407             CollationElementIterator i = c.getCollationElementIterator("");
408             i.reset();
409 
410             if (i.next() != i.NULLORDER) {
411                 errln("next did not return NULLORDER");
412             }
413         }
414         catch (Exception e) {
415             errln("Caught " + e );
416         }
417     }
418 
419     // Collator.compare not handling spaces properly
420     //
421     public void Test4103436() {
422         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
423         c.setStrength(Collator.TERTIARY);
424 
425         String[] tests = {
426             "file",      "<",    "file access",
427             "file",      "<",    "fileaccess",
428         };
429 
430         compareArray(c, tests);
431     }
432 
433     // Collation not Unicode conformant with Hangul syllables
434     //
435     public void Test4114076() {
436         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
437         c.setStrength(Collator.TERTIARY);
438 
439         //
440         // With Canonical decomposition, Hangul syllables should get decomposed
441         // into Jamo, but Jamo characters should not be decomposed into
442         // conjoining Jamo
443         //
444         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
445         String[] test1 = {
446             "\ud4db",   "=",    "\u1111\u1171\u11b6",
447         };
448         compareArray(c, test1);
449 
450         // Full decomposition result should be the same as canonical decomposition
451         // for all hangul.
452         c.setDecomposition(Collator.FULL_DECOMPOSITION);
453         compareArray(c, test1);
454 
455     }
456 
457 
458     // Collator.getCollationKey was hanging on certain character sequences
459     //
460     public void Test4124632() throws Exception {
461         Collator coll = Collator.getInstance(Locale.JAPAN);
462 
463         try {
464             coll.getCollationKey("A\u0308bc");
465         } catch (OutOfMemoryError e) {
466             errln("Ran out of memory -- probably an infinite loop");
467         }
468     }
469 
470     // sort order of french words with multiple accents has errors
471     //
472     public void Test4132736() {
473         Collator c = Collator.getInstance(Locale.FRANCE);
474 
475         String[] test1 = {
476             "e\u0300e\u0301",   "<",    "e\u0301e\u0300",
477             "e\u0300\u0301",    ">",    "e\u0301\u0300",
478         };
479         compareArray(c, test1);
480     }
481 
482     // The sorting using java.text.CollationKey is not in the exact order
483     //
484     public void Test4133509() {
485         String[] test1 = {
486             "Exception",    "<",    "ExceptionInInitializerError",
487             "Graphics",     "<",    "GraphicsEnvironment",
488             "String",       "<",    "StringBuffer",
489         };
490         compareArray(en_us, test1);
491     }
492 
493     // Collation with decomposition off doesn't work for Europe
494     //
495     public void Test4114077() {
496         // Ensure that we get the same results with decomposition off
497         // as we do with it on....
498 
499         RuleBasedCollator c = (RuleBasedCollator) en_us.clone();
500         c.setStrength(Collator.TERTIARY);
501 
502         String[] test1 = {
503             "\u00C0",        "=", "A\u0300",        // Should be equivalent
504             "p\u00eache",         ">", "p\u00e9ch\u00e9",
505             "\u0204",        "=", "E\u030F",
506             "\u01fa",        "=", "A\u030a\u0301",  // a-ring-acute -> a-ring, acute
507                                                     //   -> a, ring, acute
508             "A\u0300\u0316", "<", "A\u0316\u0300",  // No reordering --> unequal
509         };
510         c.setDecomposition(Collator.NO_DECOMPOSITION);
511         compareArray(c, test1);
512 
513         String[] test2 = {
514             "A\u0300\u0316", "=", "A\u0316\u0300",      // Reordering --> equal
515         };
516         c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
517         compareArray(c, test2);
518     }
519 
520     // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
521     //
522     public void Test4141640() {
523         //
524         // Rather than just creating a Swedish collator, we might as well
525         // try to instantiate one for every locale available on the system
526         // in order to prevent this sort of bug from cropping up in the future
527         //
528         Locale[] locales = Collator.getAvailableLocales();
529 
530         for (int i = 0; i < locales.length; i++) {
531             try {
532                 Collator c = Collator.getInstance(locales[i]);
533             } catch (Exception e) {
534                 errln("Caught " + e + " creating collator for " + locales[i]);
535             }
536         }
537     }
538 
539     // getCollationKey throws exception for spanish text
540     // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
541     //
542     public void Test4139572() {
543         //
544         // Code pasted straight from the bug report
545         //
546         // create spanish locale and collator
547         Locale l = Locale.of("es", "es");
548         Collator col = Collator.getInstance(l);
549 
550         // this spanish phrase kills it!
551         col.getCollationKey("Nombre De Objeto");
552     }
553 
554     // RuleBasedCollator doesn't use getCollationElementIterator internally
555     //
556     public void Test4146160() throws ParseException {
557         //
558         // Use a custom collator class whose getCollationElementIterator
559         // methods increment a count....
560         //
561         My4146160Collator.count = 0;
562         new My4146160Collator().getCollationKey("1");
563         if (My4146160Collator.count < 1) {
564             errln("getCollationElementIterator not called");
565         }
566 
567         My4146160Collator.count = 0;
568         new My4146160Collator().compare("1", "2");
569         if (My4146160Collator.count < 1) {
570             errln("getCollationElementIterator not called");
571         }
572     }
573 
574     static class My4146160Collator extends RuleBasedCollator {
575         public My4146160Collator() throws ParseException {
576             super(Regression.en_us.getRules());
577         }
578 
579         public CollationElementIterator getCollationElementIterator(
580                                             String text) {
581             count++;
582             return super.getCollationElementIterator(text);
583         }
584         public CollationElementIterator getCollationElementIterator(
585                                             CharacterIterator text) {
586             count++;
587             return super.getCollationElementIterator(text);
588         }
589 
590         public static int count = 0;
591     };
592 
593     // CollationElementIterator.previous broken for expanding char sequences
594     //
595     public void Test4179686() throws ParseException {
596 
597         // Create a collator with a few expanding character sequences in it....
598         RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
599                                                     + " & ae ; \u00e4 & AE ; \u00c4"
600                                                     + " & oe ; \u00f6 & OE ; \u00d6"
601                                                     + " & ue ; \u00fc & UE ; \u00dc");
602 
603         String text = "T\u00f6ne"; // o-umlaut
604 
605         CollationElementIterator iter = coll.getCollationElementIterator(text);
606         Vector elements = new Vector();
607         int elem;
608 
609         // Iterate forward and collect all of the elements into a Vector
610         while ((elem = iter.next()) != iter.NULLORDER) {
611             elements.addElement(new Integer(elem));
612         }
613 
614         // Now iterate backward and make sure they're the same
615         int index = elements.size() - 1;
616         while ((elem = iter.previous()) != iter.NULLORDER) {
617             int expect = ((Integer)elements.elementAt(index)).intValue();
618 
619             if (elem != expect) {
620                 errln("Mismatch at index " + index
621                       + ": got " + Integer.toString(elem,16)
622                       + ", expected " + Integer.toString(expect,16));
623             }
624             index--;
625         }
626     }
627 
628     public void Test4244884() throws ParseException {
629         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
630         coll = new RuleBasedCollator(coll.getRules()
631                 + " & C < ch , cH , Ch , CH < cat < crunchy");
632 
633         String[] testStrings = new String[] {
634             "car",
635             "cave",
636             "clamp",
637             "cramp",
638             "czar",
639             "church",
640             "catalogue",
641             "crunchy",
642             "dog"
643         };
644 
645         for (int i = 1; i < testStrings.length; i++) {
646             if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
647                 errln("error: \"" + testStrings[i - 1]
648                     + "\" is greater than or equal to \"" + testStrings[i]
649                     + "\".");
650             }
651         }
652     }
653 
654     public void Test4179216() throws ParseException {
655         // you can position a CollationElementIterator in the middle of
656         // a contracting character sequence, yielding a bogus collation
657         // element
658         RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(Locale.US);
659         coll = new RuleBasedCollator(coll.getRules()
660                 + " & C < ch , cH , Ch , CH < cat < crunchy");
661         String testText = "church church catcatcher runcrunchynchy";
662         CollationElementIterator iter = coll.getCollationElementIterator(
663                 testText);
664 
665         // test that the "ch" combination works properly
666         iter.setOffset(4);
667         int elt4 = CollationElementIterator.primaryOrder(iter.next());
668 
669         iter.reset();
670         int elt0 = CollationElementIterator.primaryOrder(iter.next());
671 
672         iter.setOffset(5);
673         int elt5 = CollationElementIterator.primaryOrder(iter.next());
674 
675         if (elt4 != elt0 || elt5 != elt0)
676             errln("The collation elements at positions 0 (" + elt0 + "), 4 ("
677                     + elt4 + "), and 5 (" + elt5 + ") don't match.");
678 
679         // test that the "cat" combination works properly
680         iter.setOffset(14);
681         int elt14 = CollationElementIterator.primaryOrder(iter.next());
682 
683         iter.setOffset(15);
684         int elt15 = CollationElementIterator.primaryOrder(iter.next());
685 
686         iter.setOffset(16);
687         int elt16 = CollationElementIterator.primaryOrder(iter.next());
688 
689         iter.setOffset(17);
690         int elt17 = CollationElementIterator.primaryOrder(iter.next());
691 
692         iter.setOffset(18);
693         int elt18 = CollationElementIterator.primaryOrder(iter.next());
694 
695         iter.setOffset(19);
696         int elt19 = CollationElementIterator.primaryOrder(iter.next());
697 
698         if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
699                 || elt14 != elt18 || elt14 != elt19)
700             errln("\"cat\" elements don't match: elt14 = " + elt14 + ", elt15 = "
701             + elt15 + ", elt16 = " + elt16 + ", elt17 = " + elt17
702             + ", elt18 = " + elt18 + ", elt19 = " + elt19);
703 
704         // now generate a complete list of the collation elements,
705         // first using next() and then using setOffset(), and
706         // make sure both interfaces return the same set of elements
707         iter.reset();
708 
709         int elt = iter.next();
710         int count = 0;
711         while (elt != CollationElementIterator.NULLORDER) {
712             ++count;
713             elt = iter.next();
714         }
715 
716         String[] nextElements = new String[count];
717         String[] setOffsetElements = new String[count];
718         int lastPos = 0;
719 
720         iter.reset();
721         elt = iter.next();
722         count = 0;
723         while (elt != CollationElementIterator.NULLORDER) {
724             nextElements[count++] = testText.substring(lastPos, iter.getOffset());
725             lastPos = iter.getOffset();
726             elt = iter.next();
727         }
728         count = 0;
729         for (int i = 0; i < testText.length(); ) {
730             iter.setOffset(i);
731             lastPos = iter.getOffset();
732             elt = iter.next();
733             setOffsetElements[count++] = testText.substring(lastPos, iter.getOffset());
734             i = iter.getOffset();
735         }
736         for (int i = 0; i < nextElements.length; i++) {
737             if (nextElements[i].equals(setOffsetElements[i])) {
738                 logln(nextElements[i]);
739             } else {
740                 errln("Error: next() yielded " + nextElements[i] + ", but setOffset() yielded "
741                     + setOffsetElements[i]);
742             }
743         }
744     }
745 
746     public void Test4216006() throws Exception {
747         // rule parser barfs on "<\u00e0=a\u0300", and on other cases
748         // where the same token (after normalization) appears twice in a row
749         boolean caughtException = false;
750         try {
751             RuleBasedCollator dummy = new RuleBasedCollator("\u00e0<a\u0300");
752         }
753         catch (ParseException e) {
754             caughtException = true;
755         }
756         if (!caughtException) {
757             throw new Exception("\"a<a\" collation sequence didn't cause parse error!");
758         }
759 
760         RuleBasedCollator collator = new RuleBasedCollator("<\u00e0=a\u0300");
761         collator.setDecomposition(Collator.FULL_DECOMPOSITION);
762         collator.setStrength(Collator.IDENTICAL);
763 
764         String[] tests = {
765             "a\u0300", "=", "\u00e0",
766             "\u00e0",  "=", "a\u0300"
767         };
768 
769         compareArray(collator, tests);
770     }
771 
772     public void Test4171974() {
773         // test French accent ordering more thoroughly
774         String[] frenchList = {
775             "\u0075\u0075",     // u u
776             "\u00fc\u0075",     // u-umlaut u
777             "\u01d6\u0075",     // u-umlaut-macron u
778             "\u016b\u0075",     // u-macron u
779             "\u1e7b\u0075",     // u-macron-umlaut u
780             "\u0075\u00fc",     // u u-umlaut
781             "\u00fc\u00fc",     // u-umlaut u-umlaut
782             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
783             "\u016b\u00fc",     // u-macron u-umlaut
784             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
785             "\u0075\u01d6",     // u u-umlaut-macron
786             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
787             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
788             "\u016b\u01d6",     // u-macron u-umlaut-macron
789             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
790             "\u0075\u016b",     // u u-macron
791             "\u00fc\u016b",     // u-umlaut u-macron
792             "\u01d6\u016b",     // u-umlaut-macron u-macron
793             "\u016b\u016b",     // u-macron u-macron
794             "\u1e7b\u016b",     // u-macron-umlaut u-macron
795             "\u0075\u1e7b",     // u u-macron-umlaut
796             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
797             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
798             "\u016b\u1e7b",     // u-macron u-macron-umlaut
799             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
800         };
801         Collator french = Collator.getInstance(Locale.FRENCH);
802 
803         logln("Testing French order...");
804         checkListOrder(frenchList, french);
805 
806         logln("Testing French order without decomposition...");
807         french.setDecomposition(Collator.NO_DECOMPOSITION);
808         checkListOrder(frenchList, french);
809 
810         String[] englishList = {
811             "\u0075\u0075",     // u u
812             "\u0075\u00fc",     // u u-umlaut
813             "\u0075\u01d6",     // u u-umlaut-macron
814             "\u0075\u016b",     // u u-macron
815             "\u0075\u1e7b",     // u u-macron-umlaut
816             "\u00fc\u0075",     // u-umlaut u
817             "\u00fc\u00fc",     // u-umlaut u-umlaut
818             "\u00fc\u01d6",     // u-umlaut u-umlaut-macron
819             "\u00fc\u016b",     // u-umlaut u-macron
820             "\u00fc\u1e7b",     // u-umlaut u-macron-umlaut
821             "\u01d6\u0075",     // u-umlaut-macron u
822             "\u01d6\u00fc",     // u-umlaut-macron u-umlaut
823             "\u01d6\u01d6",     // u-umlaut-macron u-umlaut-macron
824             "\u01d6\u016b",     // u-umlaut-macron u-macron
825             "\u01d6\u1e7b",     // u-umlaut-macron u-macron-umlaut
826             "\u016b\u0075",     // u-macron u
827             "\u016b\u00fc",     // u-macron u-umlaut
828             "\u016b\u01d6",     // u-macron u-umlaut-macron
829             "\u016b\u016b",     // u-macron u-macron
830             "\u016b\u1e7b",     // u-macron u-macron-umlaut
831             "\u1e7b\u0075",     // u-macron-umlaut u
832             "\u1e7b\u00fc",     // u-macron-umlaut u-umlaut
833             "\u1e7b\u01d6",     // u-macron-umlaut u-umlaut-macron
834             "\u1e7b\u016b",     // u-macron-umlaut u-macron
835             "\u1e7b\u1e7b"      // u-macron-umlaut u-macron-umlaut
836         };
837         Collator english = Collator.getInstance(Locale.ENGLISH);
838 
839         logln("Testing English order...");
840         checkListOrder(englishList, english);
841 
842         logln("Testing English order without decomposition...");
843         english.setDecomposition(Collator.NO_DECOMPOSITION);
844         checkListOrder(englishList, english);
845     }
846 
847     private void checkListOrder(String[] sortedList, Collator c) {
848         // this function uses the specified Collator to make sure the
849         // passed-in list is already sorted into ascending order
850         for (int i = 0; i < sortedList.length - 1; i++) {
851             if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
852                 errln("List out of order at element #" + i + ": "
853                         + prettify(sortedList[i]) + " >= "
854                         + prettify(sortedList[i + 1]));
855             }
856         }
857     }
858 
859     // CollationElementIterator set doesn't work propertly with next/prev
860     public void Test4663220() {
861         RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(Locale.US);
862         CharacterIterator stringIter = new StringCharacterIterator("fox");
863         CollationElementIterator iter = collator.getCollationElementIterator(stringIter);
864 
865         int[] elements_next = new int[3];
866         logln("calling next:");
867         for (int i = 0; i < 3; ++i) {
868             logln("[" + i + "] " + (elements_next[i] = iter.next()));
869         }
870 
871         int[] elements_fwd = new int[3];
872         logln("calling set/next:");
873         for (int i = 0; i < 3; ++i) {
874             iter.setOffset(i);
875             logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
876         }
877 
878         for (int i = 0; i < 3; ++i) {
879             if (elements_next[i] != elements_fwd[i]) {
880                 errln("mismatch at position " + i +
881                       ": " + elements_next[i] +
882                       " != " + elements_fwd[i]);
883             }
884         }
885     }
886 
887     //------------------------------------------------------------------------
888     // Internal utilities
889     //
890     private void compareArray(Collator c, String[] tests) {
891         for (int i = 0; i < tests.length; i += 3) {
892 
893             int expect = 0;
894             if (tests[i+1].equals("<")) {
895                 expect = -1;
896             } else if (tests[i+1].equals(">")) {
897                 expect = 1;
898             } else if (tests[i+1].equals("=")) {
899                 expect = 0;
900             } else {
901                 expect = Integer.decode(tests[i+1]).intValue();
902             }
903 
904             int result = c.compare(tests[i], tests[i+2]);
905             if (sign(result) != sign(expect))
906             {
907                 errln( i/3 + ": compare(" + prettify(tests[i])
908                                     + " , " + prettify(tests[i+2])
909                                     + ") got " + result + "; expected " + expect);
910             }
911             else
912             {
913                 // Collator.compare worked OK; now try the collation keys
914                 CollationKey k1 = c.getCollationKey(tests[i]);
915                 CollationKey k2 = c.getCollationKey(tests[i+2]);
916 
917                 result = k1.compareTo(k2);
918                 if (sign(result) != sign(expect)) {
919                     errln( i/3 + ": key(" + prettify(tests[i])
920                                         + ").compareTo(key(" + prettify(tests[i+2])
921                                         + ")) got " + result + "; expected " + expect);
922 
923                     errln("  " + prettify(k1) + " vs. " + prettify(k2));
924                 }
925             }
926         }
927     }
928 
929     private static final int sign(int i) {
930         if (i < 0) return -1;
931         if (i > 0) return 1;
932         return 0;
933     }
934 
935 
936     static RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US);
937 
938     String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
939     String test2 = "Xf ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
940     String test3 = "a\u00FCbeck Gr\u00F6\u00DFe L\u00FCbeck";
941 }