1 /*
  2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 /*
 25  * @test
 26  * @library /java/text/testlib
 27  * @summary test Dummy Collation
 28  * @run junit DummyTest
 29  */
 30 
 31 import java.text.Collator;
 32 import java.text.RuleBasedCollator;
 33 
 34 import org.junit.jupiter.api.Test;
 35 
 36 import static org.junit.jupiter.api.Assertions.fail;
 37 
 38 /*
 39 (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
 40 (C) Copyright IBM Corp. 1996 - All Rights Reserved
 41 
 42   The original version of this source code and documentation is copyrighted and
 43 owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These materials are
 44 provided under terms of a License Agreement between Taligent and Sun. This
 45 technology is protected by multiple US and International patents. This notice and
 46 attribution to Taligent may not be removed.
 47   Taligent is a registered trademark of Taligent, Inc.
 48 */
 49 
 50 public class DummyTest {
 51 
 52     private static final String DEFAULTRULES =
 53         "='\u200B'=\u200C=\u200D=\u200E=\u200F"
 54         // Control Characters
 55         + "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot
 56         + "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...
 57         + "='\u000b' =\u000e" //vt,, so
 58         + "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3
 59         + "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can
 60         + "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs
 61         + "=\u001e =\u001f =\u007f"                   //rs, us, del
 62         //....then the C1 Latin 1 reserved control codes
 63         + "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"
 64         + "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"
 65         + "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"
 66         + "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"
 67         + "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"
 68         + "=\u009e =\u009f"
 69         // IGNORE except for secondary, tertiary difference
 70         // Spaces
 71         + ";'\u0020';'\u00A0'"                  // spaces
 72         + ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'"  // spaces
 73         + ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'"  // spaces
 74         + ";'\u200A';'\u3000';'\uFEFF'"                // spaces
 75         + ";'\r' ;'\t' ;'\n';'\f';'\u000b'"  // whitespace
 76 
 77         // Non-spacing accents
 78 
 79         + ";\u0301"          // non-spacing acute accent
 80         + ";\u0300"          // non-spacing grave accent
 81         + ";\u0306"          // non-spacing breve accent
 82         + ";\u0302"          // non-spacing circumflex accent
 83         + ";\u030c"          // non-spacing caron/hacek accent
 84         + ";\u030a"          // non-spacing ring above accent
 85         + ";\u030d"          // non-spacing vertical line above
 86         + ";\u0308"          // non-spacing diaeresis accent
 87         + ";\u030b"          // non-spacing double acute accent
 88         + ";\u0303"          // non-spacing tilde accent
 89         + ";\u0307"          // non-spacing dot above/overdot accent
 90         + ";\u0304"          // non-spacing macron accent
 91         + ";\u0337"          // non-spacing short slash overlay (overstruck diacritic)
 92         + ";\u0327"          // non-spacing cedilla accent
 93         + ";\u0328"          // non-spacing ogonek accent
 94         + ";\u0323"          // non-spacing dot-below/underdot accent
 95         + ";\u0332"          // non-spacing underscore/underline accent
 96         // with the rest of the general diacritical marks in binary order
 97         + ";\u0305"          // non-spacing overscore/overline
 98         + ";\u0309"          // non-spacing hook above
 99         + ";\u030e"          // non-spacing double vertical line above
100         + ";\u030f"          // non-spacing double grave
101         + ";\u0310"          // non-spacing chandrabindu
102         + ";\u0311"          // non-spacing inverted breve
103         + ";\u0312"          // non-spacing turned comma above/cedilla above
104         + ";\u0313"          // non-spacing comma above
105         + ";\u0314"          // non-spacing reversed comma above
106         + ";\u0315"          // non-spacing comma above right
107         + ";\u0316"          // non-spacing grave below
108         + ";\u0317"          // non-spacing acute below
109         + ";\u0318"          // non-spacing left tack below
110         + ";\u0319"          // non-spacing tack below
111         + ";\u031a"          // non-spacing left angle above
112         + ";\u031b"          // non-spacing horn
113         + ";\u031c"          // non-spacing left half ring below
114         + ";\u031d"          // non-spacing up tack below
115         + ";\u031e"          // non-spacing down tack below
116         + ";\u031f"          // non-spacing plus sign below
117         + ";\u0320"          // non-spacing minus sign below
118         + ";\u0321"          // non-spacing palatalized hook below
119         + ";\u0322"          // non-spacing retroflex hook below
120         + ";\u0324"          // non-spacing double dot below
121         + ";\u0325"          // non-spacing ring below
122         + ";\u0326"          // non-spacing comma below
123         + ";\u0329"          // non-spacing vertical line below
124         + ";\u032a"          // non-spacing bridge below
125         + ";\u032b"          // non-spacing inverted double arch below
126         + ";\u032c"          // non-spacing hacek below
127         + ";\u032d"          // non-spacing circumflex below
128         + ";\u032e"          // non-spacing breve below
129         + ";\u032f"          // non-spacing inverted breve below
130         + ";\u0330"          // non-spacing tilde below
131         + ";\u0331"          // non-spacing macron below
132         + ";\u0333"          // non-spacing double underscore
133         + ";\u0334"          // non-spacing tilde overlay
134         + ";\u0335"          // non-spacing short bar overlay
135         + ";\u0336"          // non-spacing long bar overlay
136         + ";\u0338"          // non-spacing long slash overlay
137         + ";\u0339"          // non-spacing right half ring below
138         + ";\u033a"          // non-spacing inverted bridge below
139         + ";\u033b"          // non-spacing square below
140         + ";\u033c"          // non-spacing seagull below
141         + ";\u033d"          // non-spacing x above
142         + ";\u033e"          // non-spacing vertical tilde
143         + ";\u033f"          // non-spacing double overscore
144         + ";\u0340"          // non-spacing grave tone mark
145         + ";\u0341"          // non-spacing acute tone mark
146         + ";\u0342;\u0343;\u0344;\u0345;\u0360;\u0361"    // newer
147         + ";\u0483;\u0484;\u0485;\u0486"    // Cyrillic accents
148 
149         + ";\u20D0;\u20D1;\u20D2"           // symbol accents
150         + ";\u20D3;\u20D4;\u20D5"           // symbol accents
151         + ";\u20D6;\u20D7;\u20D8"           // symbol accents
152         + ";\u20D9;\u20DA;\u20DB"           // symbol accents
153         + ";\u20DC;\u20DD;\u20DE"           // symbol accents
154         + ";\u20DF;\u20E0;\u20E1"           // symbol accents
155 
156         + ",'\u002D';\u00AD"                // dashes
157         + ";\u2010;\u2011;\u2012"           // dashes
158         + ";\u2013;\u2014;\u2015"           // dashes
159         + ";\u2212"                         // dashes
160 
161         // other punctuation
162 
163         + "<'\u005f'"        // underline/underscore (spacing)
164         + "<\u00af"          // overline or macron (spacing)
165 //        + "<\u00ad"        // syllable hyphen (SHY) or soft hyphen
166         + "<'\u002c'"        // comma (spacing)
167         + "<'\u003b'"        // semicolon
168         + "<'\u003a'"        // colon
169         + "<'\u0021'"        // exclamation point
170         + "<\u00a1"          // inverted exclamation point
171         + "<'\u003f'"        // question mark
172         + "<\u00bf"          // inverted question mark
173         + "<'\u002f'"        // slash
174         + "<'\u002e'"        // period/full stop
175         + "<\u00b4"          // acute accent (spacing)
176         + "<'\u0060'"        // grave accent (spacing)
177         + "<'\u005e'"        // circumflex accent (spacing)
178         + "<\u00a8"          // diaresis/umlaut accent (spacing)
179         + "<'\u007e'"        // tilde accent (spacing)
180         + "<\u00b7"          // middle dot (spacing)
181         + "<\u00b8"          // cedilla accent (spacing)
182         + "<'\u0027'"        // apostrophe
183         + "<'\"'"            // quotation marks
184         + "<\u00ab"          // left angle quotes
185         + "<\u00bb"          // right angle quotes
186         + "<'\u0028'"        // left parenthesis
187         + "<'\u0029'"        // right parenthesis
188         + "<'\u005b'"        // left bracket
189         + "<'\u005d'"        // right bracket
190         + "<'\u007b'"        // left brace
191         + "<'\u007d'"        // right brace
192         + "<\u00a7"          // section symbol
193         + "<\u00b6"          // paragraph symbol
194         + "<\u00a9"          // copyright symbol
195         + "<\u00ae"          // registered trademark symbol
196         + "<'\u0040'"          // at sign
197         + "<\u00a4"          // international currency symbol
198         + "<\u00a2"          // cent sign
199         + "<'\u0024'"        // dollar sign
200         + "<\u00a3"          // pound-sterling sign
201         + "<\u00a5"          // yen sign
202         + "<'\u002a'"        // asterisk
203         + "<'\\u005c'"       // backslash
204         + "<'\u0026'"        // ampersand
205         + "<'\u0023'"        // number sign
206         + "<'\u0025'"        // percent sign
207         + "<'\u002b'"        // plus sign
208 //        + "<\u002d"        // hyphen or minus sign
209         + "<\u00b1"          // plus-or-minus sign
210         + "<\u00f7"          // divide sign
211         + "<\u00d7"          // multiply sign
212         + "<'\u003c'"        // less-than sign
213         + "<'\u003d'"        // equal sign
214         + "<'\u003e'"        // greater-than sign
215         + "<\u00ac"          // end of line symbol/logical NOT symbol
216         + "<'\u007c'"          // vertical line/logical OR symbol
217         + "<\u00a6"          // broken vertical line
218         + "<\u00b0"          // degree symbol
219         + "<\u00b5"          // micro symbol
220 
221         // NUMERICS
222 
223         + "<0<1<2<3<4<5<6<7<8<9"
224         + "<\u00bc<\u00bd<\u00be"   // 1/4,1/2,3/4 fractions
225 
226         // NON-IGNORABLES
227         + "<a,A"
228         + "<b,B"
229         + "<c,C"
230         + "<d,D"
231         + "<\u00F0,\u00D0"                  // eth
232         + "<e,E"
233         + "<f,F"
234         + "<g,G"
235         + "<h,H"
236         + "<i,I"
237         + "<j,J"
238         + "<k,K"
239         + "<l,L"
240         + "<m,M"
241         + "<n,N"
242         + "<o,O"
243         + "<p,P"
244         + "<q,Q"
245         + "<r,R"
246         + "<s, S & SS,\u00DF"             // s-zet
247         + "<t,T"
248         + "&th, \u00FE & TH, \u00DE"           // thorn
249         + "<u,U"
250         + "<v,V"
251         + "<w,W"
252         + "<x,X"
253         + "<y,Y"
254         + "<z,Z"
255         + "&AE,\u00C6"                    // ae & AE ligature
256         + "&AE,\u00E6"
257         + "&OE,\u0152"                    // oe & OE ligature
258         + "&OE,\u0153";
259 
260     /*
261      * Data for TestPrimary()
262      */
263     private static final String[] primarySourceData = {
264         "p\u00EAche",
265         "abc",
266         "abc",
267         "abc",
268         "abc",
269         "abc",
270         "a\u00E6c",
271         "acHc",
272         "black"
273     };
274 
275     private static final String[] primaryTargetData = {
276         "p\u00E9ch\u00E9",
277         "abc",
278         "aBC",
279         "abch",
280         "abd",
281         "\u00E4bc",
282         "a\u00C6c",
283         "aCHc",
284         "black-bird"
285     };
286 
287     private static final int[] primaryResults = {
288          0,  0,  0, -1, -1,  0,  0,  0, -1
289     };
290 
291     /*
292      * Data for TestSecondary()
293      */
294     private static final String[] secondarySourceData = {
295         "four",
296         "five",
297         "1",
298         "abc",
299         "abc",
300         "abcH",
301         "abc",
302         "acHc"
303     };
304 
305     private static final String[] secondaryTargetData = {
306 
307         "4",
308         "5",
309         "one",
310         "abc",
311         "aBc",
312         "abch",
313         "abd",
314         "aCHc"
315     };
316 
317     private static final int[] secondaryResults = {
318          0,  0,  0,  0,  0,  0, -1,  0
319     };
320 
321     /*
322      * Data for TestTertiary()
323      */
324     private static final String[] tertiarySourceData = {
325         "ab'c",
326         "co-op",
327         "ab",
328         "ampersad",
329         "all",
330         "four",
331         "five",
332         "1",
333         "1",
334         "1",
335         "2",
336         "2",
337         "Hello",
338         "a<b",
339         "a<b",
340         "acc",
341         "acHc"
342     };
343 
344     private static final String[] tertiaryTargetData = {
345         "abc",
346         "COOP",
347         "abc",
348         "&",
349         "&",
350         "4",
351         "5",
352         "one",
353         "nne",
354         "pne",
355         "two",
356         "uwo",
357         "hellO",
358         "a<=b",
359         "abc",
360         "aCHc",
361         "aCHc"
362     };
363 
364     private static final int[] tertiaryResults = {
365         -1,  1, -1, -1, -1, -1, -1,  1,  1, -1,
366          1, -1,  1,  1, -1, -1, -1
367     };
368 
369 
370     private static final String[] testData = {
371         "a",
372         "A",
373         "\u00e4",
374         "\u00c4",
375         "ae",
376         "aE",
377         "Ae",
378         "AE",
379         "\u00e6",
380         "\u00c6",
381         "b",
382         "c",
383         "z"
384     };
385 
386     @Test
387     public void TestPrimary() {
388         TestUtils.doCollatorTest(getCollator(), Collator.PRIMARY,
389                primarySourceData, primaryTargetData, primaryResults);
390     }
391 
392     @Test
393     public void TestSecondary() {
394         TestUtils.doCollatorTest(getCollator(), Collator.SECONDARY,
395                secondarySourceData, secondaryTargetData, secondaryResults);
396     }
397 
398     @Test
399     public void TestTertiary() {
400         Collator col = getCollator();
401 
402         TestUtils.doCollatorTest(col, Collator.TERTIARY,
403                tertiarySourceData, tertiaryTargetData, tertiaryResults);
404 
405         for (int i = 0; i < testData.length-1; i++) {
406             for (int j = i+1; j < testData.length; j++) {
407                 TestUtils.doCollatorTest(col, testData[i], testData[j], -1);
408             }
409         }
410     }
411 
412     private RuleBasedCollator myCollation = null;
413     private Collator getCollator() {
414         if (myCollation == null) {
415             try {
416                 myCollation = new RuleBasedCollator
417                     (DEFAULTRULES + "& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ");
418             } catch (Exception foo) {
419                 fail("Collator creation failed.");
420                 myCollation = (RuleBasedCollator)Collator.getInstance();
421             }
422         }
423         return myCollation;
424     }
425 }