1 /*
2 * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import jdk.internal.misc.CDS;
29 import jdk.internal.misc.PreviewFeatures;
30 import jdk.internal.value.DeserializeConstructor;
31 import jdk.internal.vm.annotation.AOTSafeClassInitializer;
32 import jdk.internal.vm.annotation.IntrinsicCandidate;
33 import jdk.internal.vm.annotation.Stable;
34
35 import java.lang.constant.Constable;
36 import java.lang.constant.DynamicConstantDesc;
37 import java.util.Arrays;
38 import java.util.HashMap;
39 import java.util.Locale;
40 import java.util.Map;
41 import java.util.Objects;
42 import java.util.Optional;
43
44 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST;
45 import static java.lang.constant.ConstantDescs.CD_char;
46 import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
47
48 /**
49 * The {@code Character} class is the {@linkplain
50 * java.lang##wrapperClass wrapper class} for values of the primitive
51 * type {@code char}. An object of type {@code Character} contains a
52 * single field whose type is {@code char}.
53 *
54 * <p>In addition, this class provides a large number of static methods for
55 * determining a character's category (lowercase letter, digit, etc.)
56 * and for converting characters from uppercase to lowercase and vice
57 * versa.
58 *
59 * <h2><a id="conformance">Unicode Conformance</a></h2>
60 * <p>
61 * The fields and methods of class {@code Character} are defined in terms
62 * of character information from the Unicode Standard, specifically the
63 * <i>UnicodeData</i> file that is part of the Unicode Character Database.
64 * This file specifies properties including name and category for every
65 * assigned Unicode code point or character range. The file is available
66 * from the Unicode Consortium at
67 * <a href="http://www.unicode.org">http://www.unicode.org</a>.
68 * <p>
69 * Character information is based on the Unicode Standard, version 17.0.
70 * <p>
71 * The Java platform has supported different versions of the Unicode
72 * Standard over time. The following tables list the version of Unicode used
73 * in each Java release. Unless otherwise specified, all update releases in a
74 * given Java release family use the same Unicode version.
75 * <table class="striped">
76 * <!-- The expanded table should include the current Java release, followed
77 * by commonly used releases, with other releases listed in the details
78 * section -->
79 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption>
80 * <thead>
81 * <tr><th scope="col">Java release</th>
82 * <th scope="col">Unicode version</th></tr>
83 * </thead>
84 * <tbody>
85 * <tr><th scope="row" style="text-align:left">Java SE 26</th>
86 * <td>Unicode 17.0</td></tr>
87 * <tr><th scope="row" style="text-align:left">Java SE 25</th>
88 * <td>Unicode 16.0</td></tr>
89 * <tr><th scope="row" style="text-align:left">Java SE 21</th>
90 * <td>Unicode 15.0</td></tr>
91 * <tr><th scope="row" style="text-align:left">Java SE 17</th>
92 * <td>Unicode 13.0</td></tr>
93 * <tr><th scope="row" style="text-align:left">Java SE 11</th>
94 * <td>Unicode 10.0</td></tr>
95 * <tr><th scope="row" style="text-align:left">Java SE 8</th>
96 * <td>Unicode 6.2</td></tr>
97 * </tbody>
98 * </table>
99 * <details>
100 * <summary>Show other Java releases</summary>
101 * <p>Java releases prior to Java SE 8 are listed only if they upgraded the
102 * Unicode version</p>
103 * <table class="striped">
104 * <caption style="display:none">Shows other Java releases and supported Unicode
105 * versions</caption>
106 * <thead>
107 * <tr><th scope="col">Java release</th>
108 * <th scope="col">Unicode version</th></tr>
109 * </thead>
110 * <tbody>
111 * <tr><th scope="row" style="text-align:left">Java SE 24</th>
112 * <td>Unicode 16.0</td></tr>
113 * <tr><th scope="row" style="text-align:left">Java SE 23</th>
114 * <td>Unicode 15.1</td></tr>
115 * <tr><th scope="row" style="text-align:left">Java SE 22</th>
116 * <td>Unicode 15.1</td></tr>
117 * <tr><th scope="row" style="text-align:left">Java SE 20</th>
118 * <td>Unicode 15.0</td></tr>
119 * <tr><th scope="row" style="text-align:left">Java SE 19</th>
120 * <td>Unicode 14.0</td></tr>
121 * <tr><th scope="row" style="text-align:left">Java SE 18</th>
122 * <td>Unicode 13.0</td></tr>
123 * <tr><th scope="row" style="text-align:left">Java SE 16</th>
124 * <td>Unicode 13.0</td></tr>
125 * <tr><th scope="row" style="text-align:left">Java SE 15</th>
126 * <td>Unicode 13.0</td></tr>
127 * <tr><th scope="row" style="text-align:left">Java SE 14</th>
128 * <td>Unicode 12.1</td></tr>
129 * <tr><th scope="row" style="text-align:left">Java SE 13</th>
130 * <td>Unicode 12.1</td></tr>
131 * <tr><th scope="row" style="text-align:left">Java SE 12</th>
132 * <td>Unicode 11.0</td></tr>
133 * <tr><th scope="row" style="text-align:left">Java SE 10</th>
134 * <td>Unicode 8.0</td></tr>
135 * <tr><th scope="row" style="text-align:left">Java SE 9</th>
136 * <td>Unicode 8.0</td></tr>
137 * <tr><th scope="row" style="text-align:left">Java SE 7</th>
138 * <td>Unicode 6.0</td></tr>
139 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th>
140 * <td>Unicode 4.0</td></tr>
141 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th>
142 * <td>Unicode 3.0</td></tr>
143 * <tr><th scope="row" style="text-align:left">JDK 1.1</th>
144 * <td>Unicode 2.0</td></tr>
145 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th>
146 * <td>Unicode 1.1.5</td></tr>
147 * </tbody>
148 * </table>
149 * </details>
150 * <p>
151 * Variations from these base Unicode versions, such as recognized appendixes,
152 * are documented elsewhere.
153 * <h2><a id="unicode">Unicode Character Representations</a></h2>
154 *
155 * <p>The {@code char} data type (and therefore the value that a
156 * {@code Character} object encapsulates) are based on the
157 * original Unicode specification, which defined characters as
158 * fixed-width 16-bit entities. The Unicode Standard has since been
159 * changed to allow for characters whose representation requires more
160 * than 16 bits. The range of legal <em>code point</em>s is now
161 * U+0000 to U+10FFFF, known as
162 * <em><a href="https://www.unicode.org/glossary/#unicode_scalar_value">
163 * Unicode scalar value</a></em>.
164 *
165 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
166 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
167 * <a id="supplementary">Characters</a> whose code points are greater
168 * than U+FFFF are called <em>supplementary character</em>s. The Java
169 * platform uses the UTF-16 representation in {@code char} arrays and
170 * in the {@code String} and {@code StringBuffer} classes. In
171 * this representation, supplementary characters are represented as a pair
172 * of {@code char} values, the first from the <em>high-surrogates</em>
173 * range, (\uD800-\uDBFF), the second from the
174 * <em>low-surrogates</em> range (\uDC00-\uDFFF).
175 *
176 * <p>A {@code char} value, therefore, represents Basic
177 * Multilingual Plane (BMP) code points, including the surrogate
178 * code points, or code units of the UTF-16 encoding. An
179 * {@code int} value represents all Unicode code points,
180 * including supplementary code points. The lower (least significant)
181 * 21 bits of {@code int} are used to represent Unicode code
182 * points and the upper (most significant) 11 bits must be zero.
183 * Unless otherwise specified, the behavior with respect to
184 * supplementary characters and surrogate {@code char} values is
185 * as follows:
186 *
187 * <ul>
188 * <li>The methods that only accept a {@code char} value cannot support
189 * supplementary characters. They treat {@code char} values from the
190 * surrogate ranges as undefined characters. For example,
191 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
192 * this specific value if followed by any low-surrogate value in a string
193 * would represent a letter.
194 *
195 * <li>The methods that accept an {@code int} value support all
196 * Unicode characters, including supplementary characters. For
197 * example, {@code Character.isLetter(0x2F81A)} returns
198 * {@code true} because the code point value represents a letter
199 * (a CJK ideograph).
200 * </ul>
201 *
202 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
203 * used for character values in the range between U+0000 and U+10FFFF,
204 * and <em>Unicode code unit</em> is used for 16-bit
205 * {@code char} values that are code units of the <em>UTF-16</em>
206 * encoding. For more information on Unicode terminology, refer to the
207 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
208 *
209 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
210 * class; programmers should treat instances that are {@linkplain #equals(Object) equal}
211 * as interchangeable and should not use instances for synchronization, mutexes, or
212 * with {@linkplain java.lang.ref.Reference object references}.
213 *
214 * <div class="preview-block">
215 * <div class="preview-comment">
216 * When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}.
217 * Use of value class instances for synchronization, mutexes, or with
218 * {@linkplain java.lang.ref.Reference object references} result in
219 * {@link IdentityException}.
220 * </div>
221 * </div>
222 *
223 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
224 * @author Lee Boynton
225 * @author Guy Steele
226 * @author Akira Tanaka
227 * @author Martin Buchholz
228 * @author Ulf Zibis
229 * @since 1.0
230 */
231 @jdk.internal.MigratedValueClass
232 @jdk.internal.ValueBased
233 public final class Character implements java.io.Serializable, Comparable<Character>, Constable {
234 /**
235 * The minimum radix available for conversion to and from strings.
236 * The constant value of this field is the smallest value permitted
237 * for the radix argument in radix-conversion methods such as the
238 * {@code digit} method, the {@code forDigit} method, and the
239 * {@code toString} method of class {@code Integer}.
240 *
241 * @see Character#digit(char, int)
242 * @see Character#forDigit(int, int)
243 * @see Integer#toString(int, int)
244 * @see Integer#valueOf(String)
245 */
246 public static final int MIN_RADIX = 2;
247
248 /**
249 * The maximum radix available for conversion to and from strings.
250 * The constant value of this field is the largest value permitted
251 * for the radix argument in radix-conversion methods such as the
252 * {@code digit} method, the {@code forDigit} method, and the
253 * {@code toString} method of class {@code Integer}.
254 *
255 * @see Character#digit(char, int)
256 * @see Character#forDigit(int, int)
257 * @see Integer#toString(int, int)
258 * @see Integer#valueOf(String)
259 */
260 public static final int MAX_RADIX = 36;
261
262 /**
263 * The constant value of this field is the smallest value of type
264 * {@code char}, {@code '\u005Cu0000'}.
265 *
266 * @since 1.0.2
267 */
268 public static final char MIN_VALUE = '\u0000';
269
270 /**
271 * The constant value of this field is the largest value of type
272 * {@code char}, {@code '\u005CuFFFF'}.
273 *
274 * @since 1.0.2
275 */
276 public static final char MAX_VALUE = '\uFFFF';
277
278 /**
279 * The {@code Class} instance representing the primitive type
280 * {@code char}.
281 *
282 * @since 1.1
283 */
284 public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
285
286 /*
287 * Normative general types
288 */
289
290 /*
291 * General character types
292 */
293
294 /**
295 * General category "Cn" in the Unicode specification.
296 * @since 1.1
297 */
298 public static final byte UNASSIGNED = 0;
299
300 /**
301 * General category "Lu" in the Unicode specification.
302 * @since 1.1
303 */
304 public static final byte UPPERCASE_LETTER = 1;
305
306 /**
307 * General category "Ll" in the Unicode specification.
308 * @since 1.1
309 */
310 public static final byte LOWERCASE_LETTER = 2;
311
312 /**
313 * General category "Lt" in the Unicode specification.
314 * @since 1.1
315 */
316 public static final byte TITLECASE_LETTER = 3;
317
318 /**
319 * General category "Lm" in the Unicode specification.
320 * @since 1.1
321 */
322 public static final byte MODIFIER_LETTER = 4;
323
324 /**
325 * General category "Lo" in the Unicode specification.
326 * @since 1.1
327 */
328 public static final byte OTHER_LETTER = 5;
329
330 /**
331 * General category "Mn" in the Unicode specification.
332 * @since 1.1
333 */
334 public static final byte NON_SPACING_MARK = 6;
335
336 /**
337 * General category "Me" in the Unicode specification.
338 * @since 1.1
339 */
340 public static final byte ENCLOSING_MARK = 7;
341
342 /**
343 * General category "Mc" in the Unicode specification.
344 * @since 1.1
345 */
346 public static final byte COMBINING_SPACING_MARK = 8;
347
348 /**
349 * General category "Nd" in the Unicode specification.
350 * @since 1.1
351 */
352 public static final byte DECIMAL_DIGIT_NUMBER = 9;
353
354 /**
355 * General category "Nl" in the Unicode specification.
356 * @since 1.1
357 */
358 public static final byte LETTER_NUMBER = 10;
359
360 /**
361 * General category "No" in the Unicode specification.
362 * @since 1.1
363 */
364 public static final byte OTHER_NUMBER = 11;
365
366 /**
367 * General category "Zs" in the Unicode specification.
368 * @since 1.1
369 */
370 public static final byte SPACE_SEPARATOR = 12;
371
372 /**
373 * General category "Zl" in the Unicode specification.
374 * @since 1.1
375 */
376 public static final byte LINE_SEPARATOR = 13;
377
378 /**
379 * General category "Zp" in the Unicode specification.
380 * @since 1.1
381 */
382 public static final byte PARAGRAPH_SEPARATOR = 14;
383
384 /**
385 * General category "Cc" in the Unicode specification.
386 * @since 1.1
387 */
388 public static final byte CONTROL = 15;
389
390 /**
391 * General category "Cf" in the Unicode specification.
392 * @since 1.1
393 */
394 public static final byte FORMAT = 16;
395
396 /**
397 * General category "Co" in the Unicode specification.
398 * @since 1.1
399 */
400 public static final byte PRIVATE_USE = 18;
401
402 /**
403 * General category "Cs" in the Unicode specification.
404 * @since 1.1
405 */
406 public static final byte SURROGATE = 19;
407
408 /**
409 * General category "Pd" in the Unicode specification.
410 * @since 1.1
411 */
412 public static final byte DASH_PUNCTUATION = 20;
413
414 /**
415 * General category "Ps" in the Unicode specification.
416 * @since 1.1
417 */
418 public static final byte START_PUNCTUATION = 21;
419
420 /**
421 * General category "Pe" in the Unicode specification.
422 * @since 1.1
423 */
424 public static final byte END_PUNCTUATION = 22;
425
426 /**
427 * General category "Pc" in the Unicode specification.
428 * @since 1.1
429 */
430 public static final byte CONNECTOR_PUNCTUATION = 23;
431
432 /**
433 * General category "Po" in the Unicode specification.
434 * @since 1.1
435 */
436 public static final byte OTHER_PUNCTUATION = 24;
437
438 /**
439 * General category "Sm" in the Unicode specification.
440 * @since 1.1
441 */
442 public static final byte MATH_SYMBOL = 25;
443
444 /**
445 * General category "Sc" in the Unicode specification.
446 * @since 1.1
447 */
448 public static final byte CURRENCY_SYMBOL = 26;
449
450 /**
451 * General category "Sk" in the Unicode specification.
452 * @since 1.1
453 */
454 public static final byte MODIFIER_SYMBOL = 27;
455
456 /**
457 * General category "So" in the Unicode specification.
458 * @since 1.1
459 */
460 public static final byte OTHER_SYMBOL = 28;
461
462 /**
463 * General category "Pi" in the Unicode specification.
464 * @since 1.4
465 */
466 public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
467
468 /**
469 * General category "Pf" in the Unicode specification.
470 * @since 1.4
471 */
472 public static final byte FINAL_QUOTE_PUNCTUATION = 30;
473
474 /**
475 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
476 */
477 static final int ERROR = 0xFFFFFFFF;
478
479
480 /**
481 * Undefined bidirectional character type. Undefined {@code char}
482 * values have undefined directionality in the Unicode specification.
483 * @since 1.4
484 */
485 public static final byte DIRECTIONALITY_UNDEFINED = -1;
486
487 /**
488 * Strong bidirectional character type "L" in the Unicode specification.
489 * @since 1.4
490 */
491 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
492
493 /**
494 * Strong bidirectional character type "R" in the Unicode specification.
495 * @since 1.4
496 */
497 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
498
499 /**
500 * Strong bidirectional character type "AL" in the Unicode specification.
501 * @since 1.4
502 */
503 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
504
505 /**
506 * Weak bidirectional character type "EN" in the Unicode specification.
507 * @since 1.4
508 */
509 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
510
511 /**
512 * Weak bidirectional character type "ES" in the Unicode specification.
513 * @since 1.4
514 */
515 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
516
517 /**
518 * Weak bidirectional character type "ET" in the Unicode specification.
519 * @since 1.4
520 */
521 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
522
523 /**
524 * Weak bidirectional character type "AN" in the Unicode specification.
525 * @since 1.4
526 */
527 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
528
529 /**
530 * Weak bidirectional character type "CS" in the Unicode specification.
531 * @since 1.4
532 */
533 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
534
535 /**
536 * Weak bidirectional character type "NSM" in the Unicode specification.
537 * @since 1.4
538 */
539 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
540
541 /**
542 * Weak bidirectional character type "BN" in the Unicode specification.
543 * @since 1.4
544 */
545 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
546
547 /**
548 * Neutral bidirectional character type "B" in the Unicode specification.
549 * @since 1.4
550 */
551 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
552
553 /**
554 * Neutral bidirectional character type "S" in the Unicode specification.
555 * @since 1.4
556 */
557 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
558
559 /**
560 * Neutral bidirectional character type "WS" in the Unicode specification.
561 * @since 1.4
562 */
563 public static final byte DIRECTIONALITY_WHITESPACE = 12;
564
565 /**
566 * Neutral bidirectional character type "ON" in the Unicode specification.
567 * @since 1.4
568 */
569 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
570
571 /**
572 * Strong bidirectional character type "LRE" in the Unicode specification.
573 * @since 1.4
574 */
575 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
576
577 /**
578 * Strong bidirectional character type "LRO" in the Unicode specification.
579 * @since 1.4
580 */
581 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
582
583 /**
584 * Strong bidirectional character type "RLE" in the Unicode specification.
585 * @since 1.4
586 */
587 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
588
589 /**
590 * Strong bidirectional character type "RLO" in the Unicode specification.
591 * @since 1.4
592 */
593 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
594
595 /**
596 * Weak bidirectional character type "PDF" in the Unicode specification.
597 * @since 1.4
598 */
599 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
600
601 /**
602 * Weak bidirectional character type "LRI" in the Unicode specification.
603 * @since 9
604 */
605 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
606
607 /**
608 * Weak bidirectional character type "RLI" in the Unicode specification.
609 * @since 9
610 */
611 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
612
613 /**
614 * Weak bidirectional character type "FSI" in the Unicode specification.
615 * @since 9
616 */
617 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
618
619 /**
620 * Weak bidirectional character type "PDI" in the Unicode specification.
621 * @since 9
622 */
623 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
624
625 /**
626 * The minimum value of a
627 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
628 * Unicode high-surrogate code unit</a>
629 * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
630 * A high-surrogate is also known as a <i>leading-surrogate</i>.
631 *
632 * @since 1.5
633 */
634 public static final char MIN_HIGH_SURROGATE = '\uD800';
635
636 /**
637 * The maximum value of a
638 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
639 * Unicode high-surrogate code unit</a>
640 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
641 * A high-surrogate is also known as a <i>leading-surrogate</i>.
642 *
643 * @since 1.5
644 */
645 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
646
647 /**
648 * The minimum value of a
649 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
650 * Unicode low-surrogate code unit</a>
651 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
652 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
653 *
654 * @since 1.5
655 */
656 public static final char MIN_LOW_SURROGATE = '\uDC00';
657
658 /**
659 * The maximum value of a
660 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
661 * Unicode low-surrogate code unit</a>
662 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
663 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
664 *
665 * @since 1.5
666 */
667 public static final char MAX_LOW_SURROGATE = '\uDFFF';
668
669 /**
670 * The minimum value of a Unicode surrogate code unit in the
671 * UTF-16 encoding, constant {@code '\u005CuD800'}.
672 *
673 * @since 1.5
674 */
675 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
676
677 /**
678 * The maximum value of a Unicode surrogate code unit in the
679 * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
680 *
681 * @since 1.5
682 */
683 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
684
685 /**
686 * The minimum value of a
687 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
688 * Unicode supplementary code point</a>, constant {@code U+10000}.
689 *
690 * @since 1.5
691 */
692 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
693
694 /**
695 * The minimum value of a
696 * <a href="http://www.unicode.org/glossary/#code_point">
697 * Unicode code point</a>, constant {@code U+0000}.
698 *
699 * @since 1.5
700 */
701 public static final int MIN_CODE_POINT = 0x000000;
702
703 /**
704 * The maximum value of a
705 * <a href="http://www.unicode.org/glossary/#code_point">
706 * Unicode code point</a>, constant {@code U+10FFFF}.
707 *
708 * @since 1.5
709 */
710 public static final int MAX_CODE_POINT = 0X10FFFF;
711
712 /**
713 * Returns an {@link Optional} containing the nominal descriptor for this
714 * instance.
715 *
716 * @return an {@link Optional} describing the {@linkplain Character} instance
717 * @since 15
718 */
719 @Override
720 public Optional<DynamicConstantDesc<Character>> describeConstable() {
721 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value));
722 }
723
724 /**
725 * Instances of this class represent particular subsets of the Unicode
726 * character set. The only family of subsets defined in the
727 * {@code Character} class is {@link Character.UnicodeBlock}.
728 * Other portions of the Java API may define other subsets for their
729 * own purposes.
730 *
731 * @since 1.2
732 */
733 public static class Subset {
734
735 private String name;
736
737 /**
738 * Constructs a new {@code Subset} instance.
739 *
740 * @param name The name of this subset
741 * @throws NullPointerException if name is {@code null}
742 */
743 protected Subset(String name) {
744 if (name == null) {
745 throw new NullPointerException("name");
746 }
747 this.name = name;
748 }
749
750 /**
751 * Compares two {@code Subset} objects for equality.
752 * This method returns {@code true} if and only if
753 * {@code this} and the argument refer to the same
754 * object; since this method is {@code final}, this
755 * guarantee holds for all subclasses.
756 */
757 public final boolean equals(Object obj) {
758 return (this == obj);
759 }
760
761 /**
762 * Returns the standard hash code as defined by the
763 * {@link Object#hashCode} method. This method
764 * is {@code final} in order to ensure that the
765 * {@code equals} and {@code hashCode} methods will
766 * be consistent in all subclasses.
767 */
768 public final int hashCode() {
769 return super.hashCode();
770 }
771
772 /**
773 * Returns the name of this subset.
774 */
775 public final String toString() {
776 return name;
777 }
778 }
779
780 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
781 // for the latest specification of Unicode Blocks.
782
783 /**
784 * A family of character subsets representing the character blocks in the
785 * Unicode specification. Character blocks generally define characters
786 * used for a specific script or purpose. A character is contained by
787 * at most one Unicode block.
788 *
789 * @since 1.2
790 */
791 public static final class UnicodeBlock extends Subset {
792 /**
793 * NUM_ENTITIES should match the total number of UnicodeBlock identifier
794 * names plus their aliases.
795 * It should be adjusted whenever the Unicode Character Database
796 * is upgraded.
797 */
798 private static final int NUM_ENTITIES = 804;
799 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES);
800
801 /**
802 * Creates a UnicodeBlock with the given identifier name.
803 * This name must be the same as the block identifier.
804 */
805 private UnicodeBlock(String idName) {
806 super(idName);
807 map.put(idName, this);
808 }
809
810 /**
811 * Creates a UnicodeBlock with the given identifier name and
812 * alias name.
813 */
814 private UnicodeBlock(String idName, String alias) {
815 this(idName);
816 map.put(alias, this);
817 }
818
819 /**
820 * Creates a UnicodeBlock with the given identifier name and
821 * alias names.
822 */
823 private UnicodeBlock(String idName, String... aliases) {
824 this(idName);
825 for (String alias : aliases)
826 map.put(alias, this);
827 }
828
829 /**
830 * Constant for the "Basic Latin" Unicode character block.
831 * @since 1.2
832 */
833 public static final UnicodeBlock BASIC_LATIN =
834 new UnicodeBlock("BASIC_LATIN",
835 "BASIC LATIN",
836 "BASICLATIN");
837
838 /**
839 * Constant for the "Latin-1 Supplement" Unicode character block.
840 * @since 1.2
841 */
842 public static final UnicodeBlock LATIN_1_SUPPLEMENT =
843 new UnicodeBlock("LATIN_1_SUPPLEMENT",
844 "LATIN-1 SUPPLEMENT",
845 "LATIN-1SUPPLEMENT");
846
847 /**
848 * Constant for the "Latin Extended-A" Unicode character block.
849 * @since 1.2
850 */
851 public static final UnicodeBlock LATIN_EXTENDED_A =
852 new UnicodeBlock("LATIN_EXTENDED_A",
853 "LATIN EXTENDED-A",
854 "LATINEXTENDED-A");
855
856 /**
857 * Constant for the "Latin Extended-B" Unicode character block.
858 * @since 1.2
859 */
860 public static final UnicodeBlock LATIN_EXTENDED_B =
861 new UnicodeBlock("LATIN_EXTENDED_B",
862 "LATIN EXTENDED-B",
863 "LATINEXTENDED-B");
864
865 /**
866 * Constant for the "IPA Extensions" Unicode character block.
867 * @since 1.2
868 */
869 public static final UnicodeBlock IPA_EXTENSIONS =
870 new UnicodeBlock("IPA_EXTENSIONS",
871 "IPA EXTENSIONS",
872 "IPAEXTENSIONS");
873
874 /**
875 * Constant for the "Spacing Modifier Letters" Unicode character block.
876 * @since 1.2
877 */
878 public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
879 new UnicodeBlock("SPACING_MODIFIER_LETTERS",
880 "SPACING MODIFIER LETTERS",
881 "SPACINGMODIFIERLETTERS");
882
883 /**
884 * Constant for the "Combining Diacritical Marks" Unicode character block.
885 * @since 1.2
886 */
887 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
888 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
889 "COMBINING DIACRITICAL MARKS",
890 "COMBININGDIACRITICALMARKS");
891
892 /**
893 * Constant for the "Greek and Coptic" Unicode character block.
894 * <p>
895 * This block was previously known as the "Greek" block.
896 *
897 * @since 1.2
898 */
899 public static final UnicodeBlock GREEK =
900 new UnicodeBlock("GREEK",
901 "GREEK AND COPTIC",
902 "GREEKANDCOPTIC");
903
904 /**
905 * Constant for the "Cyrillic" Unicode character block.
906 * @since 1.2
907 */
908 public static final UnicodeBlock CYRILLIC =
909 new UnicodeBlock("CYRILLIC");
910
911 /**
912 * Constant for the "Armenian" Unicode character block.
913 * @since 1.2
914 */
915 public static final UnicodeBlock ARMENIAN =
916 new UnicodeBlock("ARMENIAN");
917
918 /**
919 * Constant for the "Hebrew" Unicode character block.
920 * @since 1.2
921 */
922 public static final UnicodeBlock HEBREW =
923 new UnicodeBlock("HEBREW");
924
925 /**
926 * Constant for the "Arabic" Unicode character block.
927 * @since 1.2
928 */
929 public static final UnicodeBlock ARABIC =
930 new UnicodeBlock("ARABIC");
931
932 /**
933 * Constant for the "Devanagari" Unicode character block.
934 * @since 1.2
935 */
936 public static final UnicodeBlock DEVANAGARI =
937 new UnicodeBlock("DEVANAGARI");
938
939 /**
940 * Constant for the "Bengali" Unicode character block.
941 * @since 1.2
942 */
943 public static final UnicodeBlock BENGALI =
944 new UnicodeBlock("BENGALI");
945
946 /**
947 * Constant for the "Gurmukhi" Unicode character block.
948 * @since 1.2
949 */
950 public static final UnicodeBlock GURMUKHI =
951 new UnicodeBlock("GURMUKHI");
952
953 /**
954 * Constant for the "Gujarati" Unicode character block.
955 * @since 1.2
956 */
957 public static final UnicodeBlock GUJARATI =
958 new UnicodeBlock("GUJARATI");
959
960 /**
961 * Constant for the "Oriya" Unicode character block.
962 * @since 1.2
963 */
964 public static final UnicodeBlock ORIYA =
965 new UnicodeBlock("ORIYA");
966
967 /**
968 * Constant for the "Tamil" Unicode character block.
969 * @since 1.2
970 */
971 public static final UnicodeBlock TAMIL =
972 new UnicodeBlock("TAMIL");
973
974 /**
975 * Constant for the "Telugu" Unicode character block.
976 * @since 1.2
977 */
978 public static final UnicodeBlock TELUGU =
979 new UnicodeBlock("TELUGU");
980
981 /**
982 * Constant for the "Kannada" Unicode character block.
983 * @since 1.2
984 */
985 public static final UnicodeBlock KANNADA =
986 new UnicodeBlock("KANNADA");
987
988 /**
989 * Constant for the "Malayalam" Unicode character block.
990 * @since 1.2
991 */
992 public static final UnicodeBlock MALAYALAM =
993 new UnicodeBlock("MALAYALAM");
994
995 /**
996 * Constant for the "Thai" Unicode character block.
997 * @since 1.2
998 */
999 public static final UnicodeBlock THAI =
1000 new UnicodeBlock("THAI");
1001
1002 /**
1003 * Constant for the "Lao" Unicode character block.
1004 * @since 1.2
1005 */
1006 public static final UnicodeBlock LAO =
1007 new UnicodeBlock("LAO");
1008
1009 /**
1010 * Constant for the "Tibetan" Unicode character block.
1011 * @since 1.2
1012 */
1013 public static final UnicodeBlock TIBETAN =
1014 new UnicodeBlock("TIBETAN");
1015
1016 /**
1017 * Constant for the "Georgian" Unicode character block.
1018 * @since 1.2
1019 */
1020 public static final UnicodeBlock GEORGIAN =
1021 new UnicodeBlock("GEORGIAN");
1022
1023 /**
1024 * Constant for the "Hangul Jamo" Unicode character block.
1025 * @since 1.2
1026 */
1027 public static final UnicodeBlock HANGUL_JAMO =
1028 new UnicodeBlock("HANGUL_JAMO",
1029 "HANGUL JAMO",
1030 "HANGULJAMO");
1031
1032 /**
1033 * Constant for the "Latin Extended Additional" Unicode character block.
1034 * @since 1.2
1035 */
1036 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
1037 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
1038 "LATIN EXTENDED ADDITIONAL",
1039 "LATINEXTENDEDADDITIONAL");
1040
1041 /**
1042 * Constant for the "Greek Extended" Unicode character block.
1043 * @since 1.2
1044 */
1045 public static final UnicodeBlock GREEK_EXTENDED =
1046 new UnicodeBlock("GREEK_EXTENDED",
1047 "GREEK EXTENDED",
1048 "GREEKEXTENDED");
1049
1050 /**
1051 * Constant for the "General Punctuation" Unicode character block.
1052 * @since 1.2
1053 */
1054 public static final UnicodeBlock GENERAL_PUNCTUATION =
1055 new UnicodeBlock("GENERAL_PUNCTUATION",
1056 "GENERAL PUNCTUATION",
1057 "GENERALPUNCTUATION");
1058
1059 /**
1060 * Constant for the "Superscripts and Subscripts" Unicode character
1061 * block.
1062 * @since 1.2
1063 */
1064 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
1065 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
1066 "SUPERSCRIPTS AND SUBSCRIPTS",
1067 "SUPERSCRIPTSANDSUBSCRIPTS");
1068
1069 /**
1070 * Constant for the "Currency Symbols" Unicode character block.
1071 * @since 1.2
1072 */
1073 public static final UnicodeBlock CURRENCY_SYMBOLS =
1074 new UnicodeBlock("CURRENCY_SYMBOLS",
1075 "CURRENCY SYMBOLS",
1076 "CURRENCYSYMBOLS");
1077
1078 /**
1079 * Constant for the "Combining Diacritical Marks for Symbols" Unicode
1080 * character block.
1081 * <p>
1082 * This block was previously known as "Combining Marks for Symbols".
1083 * @since 1.2
1084 */
1085 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
1086 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
1087 "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
1088 "COMBININGDIACRITICALMARKSFORSYMBOLS",
1089 "COMBINING MARKS FOR SYMBOLS",
1090 "COMBININGMARKSFORSYMBOLS");
1091
1092 /**
1093 * Constant for the "Letterlike Symbols" Unicode character block.
1094 * @since 1.2
1095 */
1096 public static final UnicodeBlock LETTERLIKE_SYMBOLS =
1097 new UnicodeBlock("LETTERLIKE_SYMBOLS",
1098 "LETTERLIKE SYMBOLS",
1099 "LETTERLIKESYMBOLS");
1100
1101 /**
1102 * Constant for the "Number Forms" Unicode character block.
1103 * @since 1.2
1104 */
1105 public static final UnicodeBlock NUMBER_FORMS =
1106 new UnicodeBlock("NUMBER_FORMS",
1107 "NUMBER FORMS",
1108 "NUMBERFORMS");
1109
1110 /**
1111 * Constant for the "Arrows" Unicode character block.
1112 * @since 1.2
1113 */
1114 public static final UnicodeBlock ARROWS =
1115 new UnicodeBlock("ARROWS");
1116
1117 /**
1118 * Constant for the "Mathematical Operators" Unicode character block.
1119 * @since 1.2
1120 */
1121 public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1122 new UnicodeBlock("MATHEMATICAL_OPERATORS",
1123 "MATHEMATICAL OPERATORS",
1124 "MATHEMATICALOPERATORS");
1125
1126 /**
1127 * Constant for the "Miscellaneous Technical" Unicode character block.
1128 * @since 1.2
1129 */
1130 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1131 new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1132 "MISCELLANEOUS TECHNICAL",
1133 "MISCELLANEOUSTECHNICAL");
1134
1135 /**
1136 * Constant for the "Control Pictures" Unicode character block.
1137 * @since 1.2
1138 */
1139 public static final UnicodeBlock CONTROL_PICTURES =
1140 new UnicodeBlock("CONTROL_PICTURES",
1141 "CONTROL PICTURES",
1142 "CONTROLPICTURES");
1143
1144 /**
1145 * Constant for the "Optical Character Recognition" Unicode character block.
1146 * @since 1.2
1147 */
1148 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1149 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1150 "OPTICAL CHARACTER RECOGNITION",
1151 "OPTICALCHARACTERRECOGNITION");
1152
1153 /**
1154 * Constant for the "Enclosed Alphanumerics" Unicode character block.
1155 * @since 1.2
1156 */
1157 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1158 new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1159 "ENCLOSED ALPHANUMERICS",
1160 "ENCLOSEDALPHANUMERICS");
1161
1162 /**
1163 * Constant for the "Box Drawing" Unicode character block.
1164 * @since 1.2
1165 */
1166 public static final UnicodeBlock BOX_DRAWING =
1167 new UnicodeBlock("BOX_DRAWING",
1168 "BOX DRAWING",
1169 "BOXDRAWING");
1170
1171 /**
1172 * Constant for the "Block Elements" Unicode character block.
1173 * @since 1.2
1174 */
1175 public static final UnicodeBlock BLOCK_ELEMENTS =
1176 new UnicodeBlock("BLOCK_ELEMENTS",
1177 "BLOCK ELEMENTS",
1178 "BLOCKELEMENTS");
1179
1180 /**
1181 * Constant for the "Geometric Shapes" Unicode character block.
1182 * @since 1.2
1183 */
1184 public static final UnicodeBlock GEOMETRIC_SHAPES =
1185 new UnicodeBlock("GEOMETRIC_SHAPES",
1186 "GEOMETRIC SHAPES",
1187 "GEOMETRICSHAPES");
1188
1189 /**
1190 * Constant for the "Miscellaneous Symbols" Unicode character block.
1191 * @since 1.2
1192 */
1193 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1194 new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1195 "MISCELLANEOUS SYMBOLS",
1196 "MISCELLANEOUSSYMBOLS");
1197
1198 /**
1199 * Constant for the "Dingbats" Unicode character block.
1200 * @since 1.2
1201 */
1202 public static final UnicodeBlock DINGBATS =
1203 new UnicodeBlock("DINGBATS");
1204
1205 /**
1206 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1207 * @since 1.2
1208 */
1209 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1210 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1211 "CJK SYMBOLS AND PUNCTUATION",
1212 "CJKSYMBOLSANDPUNCTUATION");
1213
1214 /**
1215 * Constant for the "Hiragana" Unicode character block.
1216 * @since 1.2
1217 */
1218 public static final UnicodeBlock HIRAGANA =
1219 new UnicodeBlock("HIRAGANA");
1220
1221 /**
1222 * Constant for the "Katakana" Unicode character block.
1223 * @since 1.2
1224 */
1225 public static final UnicodeBlock KATAKANA =
1226 new UnicodeBlock("KATAKANA");
1227
1228 /**
1229 * Constant for the "Bopomofo" Unicode character block.
1230 * @since 1.2
1231 */
1232 public static final UnicodeBlock BOPOMOFO =
1233 new UnicodeBlock("BOPOMOFO");
1234
1235 /**
1236 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1237 * @since 1.2
1238 */
1239 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1240 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1241 "HANGUL COMPATIBILITY JAMO",
1242 "HANGULCOMPATIBILITYJAMO");
1243
1244 /**
1245 * Constant for the "Kanbun" Unicode character block.
1246 * @since 1.2
1247 */
1248 public static final UnicodeBlock KANBUN =
1249 new UnicodeBlock("KANBUN");
1250
1251 /**
1252 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1253 * @since 1.2
1254 */
1255 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1256 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1257 "ENCLOSED CJK LETTERS AND MONTHS",
1258 "ENCLOSEDCJKLETTERSANDMONTHS");
1259
1260 /**
1261 * Constant for the "CJK Compatibility" Unicode character block.
1262 * @since 1.2
1263 */
1264 public static final UnicodeBlock CJK_COMPATIBILITY =
1265 new UnicodeBlock("CJK_COMPATIBILITY",
1266 "CJK COMPATIBILITY",
1267 "CJKCOMPATIBILITY");
1268
1269 /**
1270 * Constant for the "CJK Unified Ideographs" Unicode character block.
1271 * @since 1.2
1272 */
1273 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1274 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1275 "CJK UNIFIED IDEOGRAPHS",
1276 "CJKUNIFIEDIDEOGRAPHS");
1277
1278 /**
1279 * Constant for the "Hangul Syllables" Unicode character block.
1280 * @since 1.2
1281 */
1282 public static final UnicodeBlock HANGUL_SYLLABLES =
1283 new UnicodeBlock("HANGUL_SYLLABLES",
1284 "HANGUL SYLLABLES",
1285 "HANGULSYLLABLES");
1286
1287 /**
1288 * Constant for the "Private Use Area" Unicode character block.
1289 * @since 1.2
1290 */
1291 public static final UnicodeBlock PRIVATE_USE_AREA =
1292 new UnicodeBlock("PRIVATE_USE_AREA",
1293 "PRIVATE USE AREA",
1294 "PRIVATEUSEAREA");
1295
1296 /**
1297 * Constant for the "CJK Compatibility Ideographs" Unicode character
1298 * block.
1299 * @since 1.2
1300 */
1301 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1302 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1303 "CJK COMPATIBILITY IDEOGRAPHS",
1304 "CJKCOMPATIBILITYIDEOGRAPHS");
1305
1306 /**
1307 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1308 * @since 1.2
1309 */
1310 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1311 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1312 "ALPHABETIC PRESENTATION FORMS",
1313 "ALPHABETICPRESENTATIONFORMS");
1314
1315 /**
1316 * Constant for the "Arabic Presentation Forms-A" Unicode character
1317 * block.
1318 * @since 1.2
1319 */
1320 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1321 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1322 "ARABIC PRESENTATION FORMS-A",
1323 "ARABICPRESENTATIONFORMS-A");
1324
1325 /**
1326 * Constant for the "Combining Half Marks" Unicode character block.
1327 * @since 1.2
1328 */
1329 public static final UnicodeBlock COMBINING_HALF_MARKS =
1330 new UnicodeBlock("COMBINING_HALF_MARKS",
1331 "COMBINING HALF MARKS",
1332 "COMBININGHALFMARKS");
1333
1334 /**
1335 * Constant for the "CJK Compatibility Forms" Unicode character block.
1336 * @since 1.2
1337 */
1338 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1339 new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1340 "CJK COMPATIBILITY FORMS",
1341 "CJKCOMPATIBILITYFORMS");
1342
1343 /**
1344 * Constant for the "Small Form Variants" Unicode character block.
1345 * @since 1.2
1346 */
1347 public static final UnicodeBlock SMALL_FORM_VARIANTS =
1348 new UnicodeBlock("SMALL_FORM_VARIANTS",
1349 "SMALL FORM VARIANTS",
1350 "SMALLFORMVARIANTS");
1351
1352 /**
1353 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1354 * @since 1.2
1355 */
1356 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1357 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1358 "ARABIC PRESENTATION FORMS-B",
1359 "ARABICPRESENTATIONFORMS-B");
1360
1361 /**
1362 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1363 * block.
1364 * @since 1.2
1365 */
1366 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1367 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1368 "HALFWIDTH AND FULLWIDTH FORMS",
1369 "HALFWIDTHANDFULLWIDTHFORMS");
1370
1371 /**
1372 * Constant for the "Specials" Unicode character block.
1373 * @since 1.2
1374 */
1375 public static final UnicodeBlock SPECIALS =
1376 new UnicodeBlock("SPECIALS");
1377
1378 /**
1379 * @deprecated
1380 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1381 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1382 * These constants match the block definitions of the Unicode Standard.
1383 * The {@link #of(char)} and {@link #of(int)} methods return the
1384 * standard constants.
1385 */
1386 @Deprecated(since="1.5")
1387 public static final UnicodeBlock SURROGATES_AREA =
1388 new UnicodeBlock("SURROGATES_AREA");
1389
1390 /**
1391 * Constant for the "Syriac" Unicode character block.
1392 * @since 1.4
1393 */
1394 public static final UnicodeBlock SYRIAC =
1395 new UnicodeBlock("SYRIAC");
1396
1397 /**
1398 * Constant for the "Thaana" Unicode character block.
1399 * @since 1.4
1400 */
1401 public static final UnicodeBlock THAANA =
1402 new UnicodeBlock("THAANA");
1403
1404 /**
1405 * Constant for the "Sinhala" Unicode character block.
1406 * @since 1.4
1407 */
1408 public static final UnicodeBlock SINHALA =
1409 new UnicodeBlock("SINHALA");
1410
1411 /**
1412 * Constant for the "Myanmar" Unicode character block.
1413 * @since 1.4
1414 */
1415 public static final UnicodeBlock MYANMAR =
1416 new UnicodeBlock("MYANMAR");
1417
1418 /**
1419 * Constant for the "Ethiopic" Unicode character block.
1420 * @since 1.4
1421 */
1422 public static final UnicodeBlock ETHIOPIC =
1423 new UnicodeBlock("ETHIOPIC");
1424
1425 /**
1426 * Constant for the "Cherokee" Unicode character block.
1427 * @since 1.4
1428 */
1429 public static final UnicodeBlock CHEROKEE =
1430 new UnicodeBlock("CHEROKEE");
1431
1432 /**
1433 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1434 * @since 1.4
1435 */
1436 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1437 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1438 "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1439 "UNIFIEDCANADIANABORIGINALSYLLABICS");
1440
1441 /**
1442 * Constant for the "Ogham" Unicode character block.
1443 * @since 1.4
1444 */
1445 public static final UnicodeBlock OGHAM =
1446 new UnicodeBlock("OGHAM");
1447
1448 /**
1449 * Constant for the "Runic" Unicode character block.
1450 * @since 1.4
1451 */
1452 public static final UnicodeBlock RUNIC =
1453 new UnicodeBlock("RUNIC");
1454
1455 /**
1456 * Constant for the "Khmer" Unicode character block.
1457 * @since 1.4
1458 */
1459 public static final UnicodeBlock KHMER =
1460 new UnicodeBlock("KHMER");
1461
1462 /**
1463 * Constant for the "Mongolian" Unicode character block.
1464 * @since 1.4
1465 */
1466 public static final UnicodeBlock MONGOLIAN =
1467 new UnicodeBlock("MONGOLIAN");
1468
1469 /**
1470 * Constant for the "Braille Patterns" Unicode character block.
1471 * @since 1.4
1472 */
1473 public static final UnicodeBlock BRAILLE_PATTERNS =
1474 new UnicodeBlock("BRAILLE_PATTERNS",
1475 "BRAILLE PATTERNS",
1476 "BRAILLEPATTERNS");
1477
1478 /**
1479 * Constant for the "CJK Radicals Supplement" Unicode character block.
1480 * @since 1.4
1481 */
1482 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1483 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1484 "CJK RADICALS SUPPLEMENT",
1485 "CJKRADICALSSUPPLEMENT");
1486
1487 /**
1488 * Constant for the "Kangxi Radicals" Unicode character block.
1489 * @since 1.4
1490 */
1491 public static final UnicodeBlock KANGXI_RADICALS =
1492 new UnicodeBlock("KANGXI_RADICALS",
1493 "KANGXI RADICALS",
1494 "KANGXIRADICALS");
1495
1496 /**
1497 * Constant for the "Ideographic Description Characters" Unicode character block.
1498 * @since 1.4
1499 */
1500 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1501 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1502 "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1503 "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1504
1505 /**
1506 * Constant for the "Bopomofo Extended" Unicode character block.
1507 * @since 1.4
1508 */
1509 public static final UnicodeBlock BOPOMOFO_EXTENDED =
1510 new UnicodeBlock("BOPOMOFO_EXTENDED",
1511 "BOPOMOFO EXTENDED",
1512 "BOPOMOFOEXTENDED");
1513
1514 /**
1515 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1516 * @since 1.4
1517 */
1518 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1519 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1520 "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1521 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1522
1523 /**
1524 * Constant for the "Yi Syllables" Unicode character block.
1525 * @since 1.4
1526 */
1527 public static final UnicodeBlock YI_SYLLABLES =
1528 new UnicodeBlock("YI_SYLLABLES",
1529 "YI SYLLABLES",
1530 "YISYLLABLES");
1531
1532 /**
1533 * Constant for the "Yi Radicals" Unicode character block.
1534 * @since 1.4
1535 */
1536 public static final UnicodeBlock YI_RADICALS =
1537 new UnicodeBlock("YI_RADICALS",
1538 "YI RADICALS",
1539 "YIRADICALS");
1540
1541 /**
1542 * Constant for the "Cyrillic Supplement" Unicode character block.
1543 * This block was previously known as the "Cyrillic Supplementary" block.
1544 * @since 1.5
1545 */
1546 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1547 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1548 "CYRILLIC SUPPLEMENTARY",
1549 "CYRILLICSUPPLEMENTARY",
1550 "CYRILLIC SUPPLEMENT",
1551 "CYRILLICSUPPLEMENT");
1552
1553 /**
1554 * Constant for the "Tagalog" Unicode character block.
1555 * @since 1.5
1556 */
1557 public static final UnicodeBlock TAGALOG =
1558 new UnicodeBlock("TAGALOG");
1559
1560 /**
1561 * Constant for the "Hanunoo" Unicode character block.
1562 * @since 1.5
1563 */
1564 public static final UnicodeBlock HANUNOO =
1565 new UnicodeBlock("HANUNOO");
1566
1567 /**
1568 * Constant for the "Buhid" Unicode character block.
1569 * @since 1.5
1570 */
1571 public static final UnicodeBlock BUHID =
1572 new UnicodeBlock("BUHID");
1573
1574 /**
1575 * Constant for the "Tagbanwa" Unicode character block.
1576 * @since 1.5
1577 */
1578 public static final UnicodeBlock TAGBANWA =
1579 new UnicodeBlock("TAGBANWA");
1580
1581 /**
1582 * Constant for the "Limbu" Unicode character block.
1583 * @since 1.5
1584 */
1585 public static final UnicodeBlock LIMBU =
1586 new UnicodeBlock("LIMBU");
1587
1588 /**
1589 * Constant for the "Tai Le" Unicode character block.
1590 * @since 1.5
1591 */
1592 public static final UnicodeBlock TAI_LE =
1593 new UnicodeBlock("TAI_LE",
1594 "TAI LE",
1595 "TAILE");
1596
1597 /**
1598 * Constant for the "Khmer Symbols" Unicode character block.
1599 * @since 1.5
1600 */
1601 public static final UnicodeBlock KHMER_SYMBOLS =
1602 new UnicodeBlock("KHMER_SYMBOLS",
1603 "KHMER SYMBOLS",
1604 "KHMERSYMBOLS");
1605
1606 /**
1607 * Constant for the "Phonetic Extensions" Unicode character block.
1608 * @since 1.5
1609 */
1610 public static final UnicodeBlock PHONETIC_EXTENSIONS =
1611 new UnicodeBlock("PHONETIC_EXTENSIONS",
1612 "PHONETIC EXTENSIONS",
1613 "PHONETICEXTENSIONS");
1614
1615 /**
1616 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1617 * @since 1.5
1618 */
1619 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1620 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1621 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1622 "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1623
1624 /**
1625 * Constant for the "Supplemental Arrows-A" Unicode character block.
1626 * @since 1.5
1627 */
1628 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1629 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1630 "SUPPLEMENTAL ARROWS-A",
1631 "SUPPLEMENTALARROWS-A");
1632
1633 /**
1634 * Constant for the "Supplemental Arrows-B" Unicode character block.
1635 * @since 1.5
1636 */
1637 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1638 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1639 "SUPPLEMENTAL ARROWS-B",
1640 "SUPPLEMENTALARROWS-B");
1641
1642 /**
1643 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1644 * character block.
1645 * @since 1.5
1646 */
1647 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1648 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1649 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1650 "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1651
1652 /**
1653 * Constant for the "Supplemental Mathematical Operators" Unicode
1654 * character block.
1655 * @since 1.5
1656 */
1657 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1658 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1659 "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1660 "SUPPLEMENTALMATHEMATICALOPERATORS");
1661
1662 /**
1663 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1664 * block.
1665 * @since 1.5
1666 */
1667 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1668 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1669 "MISCELLANEOUS SYMBOLS AND ARROWS",
1670 "MISCELLANEOUSSYMBOLSANDARROWS");
1671
1672 /**
1673 * Constant for the "Katakana Phonetic Extensions" Unicode character
1674 * block.
1675 * @since 1.5
1676 */
1677 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1678 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1679 "KATAKANA PHONETIC EXTENSIONS",
1680 "KATAKANAPHONETICEXTENSIONS");
1681
1682 /**
1683 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1684 * @since 1.5
1685 */
1686 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1687 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1688 "YIJING HEXAGRAM SYMBOLS",
1689 "YIJINGHEXAGRAMSYMBOLS");
1690
1691 /**
1692 * Constant for the "Variation Selectors" Unicode character block.
1693 * @since 1.5
1694 */
1695 public static final UnicodeBlock VARIATION_SELECTORS =
1696 new UnicodeBlock("VARIATION_SELECTORS",
1697 "VARIATION SELECTORS",
1698 "VARIATIONSELECTORS");
1699
1700 /**
1701 * Constant for the "Linear B Syllabary" Unicode character block.
1702 * @since 1.5
1703 */
1704 public static final UnicodeBlock LINEAR_B_SYLLABARY =
1705 new UnicodeBlock("LINEAR_B_SYLLABARY",
1706 "LINEAR B SYLLABARY",
1707 "LINEARBSYLLABARY");
1708
1709 /**
1710 * Constant for the "Linear B Ideograms" Unicode character block.
1711 * @since 1.5
1712 */
1713 public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1714 new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1715 "LINEAR B IDEOGRAMS",
1716 "LINEARBIDEOGRAMS");
1717
1718 /**
1719 * Constant for the "Aegean Numbers" Unicode character block.
1720 * @since 1.5
1721 */
1722 public static final UnicodeBlock AEGEAN_NUMBERS =
1723 new UnicodeBlock("AEGEAN_NUMBERS",
1724 "AEGEAN NUMBERS",
1725 "AEGEANNUMBERS");
1726
1727 /**
1728 * Constant for the "Old Italic" Unicode character block.
1729 * @since 1.5
1730 */
1731 public static final UnicodeBlock OLD_ITALIC =
1732 new UnicodeBlock("OLD_ITALIC",
1733 "OLD ITALIC",
1734 "OLDITALIC");
1735
1736 /**
1737 * Constant for the "Gothic" Unicode character block.
1738 * @since 1.5
1739 */
1740 public static final UnicodeBlock GOTHIC =
1741 new UnicodeBlock("GOTHIC");
1742
1743 /**
1744 * Constant for the "Ugaritic" Unicode character block.
1745 * @since 1.5
1746 */
1747 public static final UnicodeBlock UGARITIC =
1748 new UnicodeBlock("UGARITIC");
1749
1750 /**
1751 * Constant for the "Deseret" Unicode character block.
1752 * @since 1.5
1753 */
1754 public static final UnicodeBlock DESERET =
1755 new UnicodeBlock("DESERET");
1756
1757 /**
1758 * Constant for the "Shavian" Unicode character block.
1759 * @since 1.5
1760 */
1761 public static final UnicodeBlock SHAVIAN =
1762 new UnicodeBlock("SHAVIAN");
1763
1764 /**
1765 * Constant for the "Osmanya" Unicode character block.
1766 * @since 1.5
1767 */
1768 public static final UnicodeBlock OSMANYA =
1769 new UnicodeBlock("OSMANYA");
1770
1771 /**
1772 * Constant for the "Cypriot Syllabary" Unicode character block.
1773 * @since 1.5
1774 */
1775 public static final UnicodeBlock CYPRIOT_SYLLABARY =
1776 new UnicodeBlock("CYPRIOT_SYLLABARY",
1777 "CYPRIOT SYLLABARY",
1778 "CYPRIOTSYLLABARY");
1779
1780 /**
1781 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1782 * @since 1.5
1783 */
1784 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1785 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1786 "BYZANTINE MUSICAL SYMBOLS",
1787 "BYZANTINEMUSICALSYMBOLS");
1788
1789 /**
1790 * Constant for the "Musical Symbols" Unicode character block.
1791 * @since 1.5
1792 */
1793 public static final UnicodeBlock MUSICAL_SYMBOLS =
1794 new UnicodeBlock("MUSICAL_SYMBOLS",
1795 "MUSICAL SYMBOLS",
1796 "MUSICALSYMBOLS");
1797
1798 /**
1799 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1800 * @since 1.5
1801 */
1802 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1803 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1804 "TAI XUAN JING SYMBOLS",
1805 "TAIXUANJINGSYMBOLS");
1806
1807 /**
1808 * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1809 * character block.
1810 * @since 1.5
1811 */
1812 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1813 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1814 "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1815 "MATHEMATICALALPHANUMERICSYMBOLS");
1816
1817 /**
1818 * Constant for the "CJK Unified Ideographs Extension B" Unicode
1819 * character block.
1820 * @since 1.5
1821 */
1822 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1823 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1824 "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1825 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1826
1827 /**
1828 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1829 * @since 1.5
1830 */
1831 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1832 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1833 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1834 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1835
1836 /**
1837 * Constant for the "Tags" Unicode character block.
1838 * @since 1.5
1839 */
1840 public static final UnicodeBlock TAGS =
1841 new UnicodeBlock("TAGS");
1842
1843 /**
1844 * Constant for the "Variation Selectors Supplement" Unicode character
1845 * block.
1846 * @since 1.5
1847 */
1848 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1849 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1850 "VARIATION SELECTORS SUPPLEMENT",
1851 "VARIATIONSELECTORSSUPPLEMENT");
1852
1853 /**
1854 * Constant for the "Supplementary Private Use Area-A" Unicode character
1855 * block.
1856 * @since 1.5
1857 */
1858 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1859 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1860 "SUPPLEMENTARY PRIVATE USE AREA-A",
1861 "SUPPLEMENTARYPRIVATEUSEAREA-A");
1862
1863 /**
1864 * Constant for the "Supplementary Private Use Area-B" Unicode character
1865 * block.
1866 * @since 1.5
1867 */
1868 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1869 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1870 "SUPPLEMENTARY PRIVATE USE AREA-B",
1871 "SUPPLEMENTARYPRIVATEUSEAREA-B");
1872
1873 /**
1874 * Constant for the "High Surrogates" Unicode character block.
1875 * This block represents codepoint values in the high surrogate
1876 * range: U+D800 through U+DB7F
1877 *
1878 * @since 1.5
1879 */
1880 public static final UnicodeBlock HIGH_SURROGATES =
1881 new UnicodeBlock("HIGH_SURROGATES",
1882 "HIGH SURROGATES",
1883 "HIGHSURROGATES");
1884
1885 /**
1886 * Constant for the "High Private Use Surrogates" Unicode character
1887 * block.
1888 * This block represents codepoint values in the private use high
1889 * surrogate range: U+DB80 through U+DBFF
1890 *
1891 * @since 1.5
1892 */
1893 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1894 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1895 "HIGH PRIVATE USE SURROGATES",
1896 "HIGHPRIVATEUSESURROGATES");
1897
1898 /**
1899 * Constant for the "Low Surrogates" Unicode character block.
1900 * This block represents codepoint values in the low surrogate
1901 * range: U+DC00 through U+DFFF
1902 *
1903 * @since 1.5
1904 */
1905 public static final UnicodeBlock LOW_SURROGATES =
1906 new UnicodeBlock("LOW_SURROGATES",
1907 "LOW SURROGATES",
1908 "LOWSURROGATES");
1909
1910 /**
1911 * Constant for the "Arabic Supplement" Unicode character block.
1912 * @since 1.7
1913 */
1914 public static final UnicodeBlock ARABIC_SUPPLEMENT =
1915 new UnicodeBlock("ARABIC_SUPPLEMENT",
1916 "ARABIC SUPPLEMENT",
1917 "ARABICSUPPLEMENT");
1918
1919 /**
1920 * Constant for the "NKo" Unicode character block.
1921 * @since 1.7
1922 */
1923 public static final UnicodeBlock NKO =
1924 new UnicodeBlock("NKO");
1925
1926 /**
1927 * Constant for the "Samaritan" Unicode character block.
1928 * @since 1.7
1929 */
1930 public static final UnicodeBlock SAMARITAN =
1931 new UnicodeBlock("SAMARITAN");
1932
1933 /**
1934 * Constant for the "Mandaic" Unicode character block.
1935 * @since 1.7
1936 */
1937 public static final UnicodeBlock MANDAIC =
1938 new UnicodeBlock("MANDAIC");
1939
1940 /**
1941 * Constant for the "Ethiopic Supplement" Unicode character block.
1942 * @since 1.7
1943 */
1944 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1945 new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1946 "ETHIOPIC SUPPLEMENT",
1947 "ETHIOPICSUPPLEMENT");
1948
1949 /**
1950 * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1951 * Unicode character block.
1952 * @since 1.7
1953 */
1954 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1955 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1956 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1957 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1958
1959 /**
1960 * Constant for the "New Tai Lue" Unicode character block.
1961 * @since 1.7
1962 */
1963 public static final UnicodeBlock NEW_TAI_LUE =
1964 new UnicodeBlock("NEW_TAI_LUE",
1965 "NEW TAI LUE",
1966 "NEWTAILUE");
1967
1968 /**
1969 * Constant for the "Buginese" Unicode character block.
1970 * @since 1.7
1971 */
1972 public static final UnicodeBlock BUGINESE =
1973 new UnicodeBlock("BUGINESE");
1974
1975 /**
1976 * Constant for the "Tai Tham" Unicode character block.
1977 * @since 1.7
1978 */
1979 public static final UnicodeBlock TAI_THAM =
1980 new UnicodeBlock("TAI_THAM",
1981 "TAI THAM",
1982 "TAITHAM");
1983
1984 /**
1985 * Constant for the "Balinese" Unicode character block.
1986 * @since 1.7
1987 */
1988 public static final UnicodeBlock BALINESE =
1989 new UnicodeBlock("BALINESE");
1990
1991 /**
1992 * Constant for the "Sundanese" Unicode character block.
1993 * @since 1.7
1994 */
1995 public static final UnicodeBlock SUNDANESE =
1996 new UnicodeBlock("SUNDANESE");
1997
1998 /**
1999 * Constant for the "Batak" Unicode character block.
2000 * @since 1.7
2001 */
2002 public static final UnicodeBlock BATAK =
2003 new UnicodeBlock("BATAK");
2004
2005 /**
2006 * Constant for the "Lepcha" Unicode character block.
2007 * @since 1.7
2008 */
2009 public static final UnicodeBlock LEPCHA =
2010 new UnicodeBlock("LEPCHA");
2011
2012 /**
2013 * Constant for the "Ol Chiki" Unicode character block.
2014 * @since 1.7
2015 */
2016 public static final UnicodeBlock OL_CHIKI =
2017 new UnicodeBlock("OL_CHIKI",
2018 "OL CHIKI",
2019 "OLCHIKI");
2020
2021 /**
2022 * Constant for the "Vedic Extensions" Unicode character block.
2023 * @since 1.7
2024 */
2025 public static final UnicodeBlock VEDIC_EXTENSIONS =
2026 new UnicodeBlock("VEDIC_EXTENSIONS",
2027 "VEDIC EXTENSIONS",
2028 "VEDICEXTENSIONS");
2029
2030 /**
2031 * Constant for the "Phonetic Extensions Supplement" Unicode character
2032 * block.
2033 * @since 1.7
2034 */
2035 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
2036 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
2037 "PHONETIC EXTENSIONS SUPPLEMENT",
2038 "PHONETICEXTENSIONSSUPPLEMENT");
2039
2040 /**
2041 * Constant for the "Combining Diacritical Marks Supplement" Unicode
2042 * character block.
2043 * @since 1.7
2044 */
2045 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2046 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2047 "COMBINING DIACRITICAL MARKS SUPPLEMENT",
2048 "COMBININGDIACRITICALMARKSSUPPLEMENT");
2049
2050 /**
2051 * Constant for the "Glagolitic" Unicode character block.
2052 * @since 1.7
2053 */
2054 public static final UnicodeBlock GLAGOLITIC =
2055 new UnicodeBlock("GLAGOLITIC");
2056
2057 /**
2058 * Constant for the "Latin Extended-C" Unicode character block.
2059 * @since 1.7
2060 */
2061 public static final UnicodeBlock LATIN_EXTENDED_C =
2062 new UnicodeBlock("LATIN_EXTENDED_C",
2063 "LATIN EXTENDED-C",
2064 "LATINEXTENDED-C");
2065
2066 /**
2067 * Constant for the "Coptic" Unicode character block.
2068 * @since 1.7
2069 */
2070 public static final UnicodeBlock COPTIC =
2071 new UnicodeBlock("COPTIC");
2072
2073 /**
2074 * Constant for the "Georgian Supplement" Unicode character block.
2075 * @since 1.7
2076 */
2077 public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2078 new UnicodeBlock("GEORGIAN_SUPPLEMENT",
2079 "GEORGIAN SUPPLEMENT",
2080 "GEORGIANSUPPLEMENT");
2081
2082 /**
2083 * Constant for the "Tifinagh" Unicode character block.
2084 * @since 1.7
2085 */
2086 public static final UnicodeBlock TIFINAGH =
2087 new UnicodeBlock("TIFINAGH");
2088
2089 /**
2090 * Constant for the "Ethiopic Extended" Unicode character block.
2091 * @since 1.7
2092 */
2093 public static final UnicodeBlock ETHIOPIC_EXTENDED =
2094 new UnicodeBlock("ETHIOPIC_EXTENDED",
2095 "ETHIOPIC EXTENDED",
2096 "ETHIOPICEXTENDED");
2097
2098 /**
2099 * Constant for the "Cyrillic Extended-A" Unicode character block.
2100 * @since 1.7
2101 */
2102 public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2103 new UnicodeBlock("CYRILLIC_EXTENDED_A",
2104 "CYRILLIC EXTENDED-A",
2105 "CYRILLICEXTENDED-A");
2106
2107 /**
2108 * Constant for the "Supplemental Punctuation" Unicode character block.
2109 * @since 1.7
2110 */
2111 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2112 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2113 "SUPPLEMENTAL PUNCTUATION",
2114 "SUPPLEMENTALPUNCTUATION");
2115
2116 /**
2117 * Constant for the "CJK Strokes" Unicode character block.
2118 * @since 1.7
2119 */
2120 public static final UnicodeBlock CJK_STROKES =
2121 new UnicodeBlock("CJK_STROKES",
2122 "CJK STROKES",
2123 "CJKSTROKES");
2124
2125 /**
2126 * Constant for the "Lisu" Unicode character block.
2127 * @since 1.7
2128 */
2129 public static final UnicodeBlock LISU =
2130 new UnicodeBlock("LISU");
2131
2132 /**
2133 * Constant for the "Vai" Unicode character block.
2134 * @since 1.7
2135 */
2136 public static final UnicodeBlock VAI =
2137 new UnicodeBlock("VAI");
2138
2139 /**
2140 * Constant for the "Cyrillic Extended-B" Unicode character block.
2141 * @since 1.7
2142 */
2143 public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2144 new UnicodeBlock("CYRILLIC_EXTENDED_B",
2145 "CYRILLIC EXTENDED-B",
2146 "CYRILLICEXTENDED-B");
2147
2148 /**
2149 * Constant for the "Bamum" Unicode character block.
2150 * @since 1.7
2151 */
2152 public static final UnicodeBlock BAMUM =
2153 new UnicodeBlock("BAMUM");
2154
2155 /**
2156 * Constant for the "Modifier Tone Letters" Unicode character block.
2157 * @since 1.7
2158 */
2159 public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2160 new UnicodeBlock("MODIFIER_TONE_LETTERS",
2161 "MODIFIER TONE LETTERS",
2162 "MODIFIERTONELETTERS");
2163
2164 /**
2165 * Constant for the "Latin Extended-D" Unicode character block.
2166 * @since 1.7
2167 */
2168 public static final UnicodeBlock LATIN_EXTENDED_D =
2169 new UnicodeBlock("LATIN_EXTENDED_D",
2170 "LATIN EXTENDED-D",
2171 "LATINEXTENDED-D");
2172
2173 /**
2174 * Constant for the "Syloti Nagri" Unicode character block.
2175 * @since 1.7
2176 */
2177 public static final UnicodeBlock SYLOTI_NAGRI =
2178 new UnicodeBlock("SYLOTI_NAGRI",
2179 "SYLOTI NAGRI",
2180 "SYLOTINAGRI");
2181
2182 /**
2183 * Constant for the "Common Indic Number Forms" Unicode character block.
2184 * @since 1.7
2185 */
2186 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2187 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2188 "COMMON INDIC NUMBER FORMS",
2189 "COMMONINDICNUMBERFORMS");
2190
2191 /**
2192 * Constant for the "Phags-pa" Unicode character block.
2193 * @since 1.7
2194 */
2195 public static final UnicodeBlock PHAGS_PA =
2196 new UnicodeBlock("PHAGS_PA",
2197 "PHAGS-PA");
2198
2199 /**
2200 * Constant for the "Saurashtra" Unicode character block.
2201 * @since 1.7
2202 */
2203 public static final UnicodeBlock SAURASHTRA =
2204 new UnicodeBlock("SAURASHTRA");
2205
2206 /**
2207 * Constant for the "Devanagari Extended" Unicode character block.
2208 * @since 1.7
2209 */
2210 public static final UnicodeBlock DEVANAGARI_EXTENDED =
2211 new UnicodeBlock("DEVANAGARI_EXTENDED",
2212 "DEVANAGARI EXTENDED",
2213 "DEVANAGARIEXTENDED");
2214
2215 /**
2216 * Constant for the "Kayah Li" Unicode character block.
2217 * @since 1.7
2218 */
2219 public static final UnicodeBlock KAYAH_LI =
2220 new UnicodeBlock("KAYAH_LI",
2221 "KAYAH LI",
2222 "KAYAHLI");
2223
2224 /**
2225 * Constant for the "Rejang" Unicode character block.
2226 * @since 1.7
2227 */
2228 public static final UnicodeBlock REJANG =
2229 new UnicodeBlock("REJANG");
2230
2231 /**
2232 * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2233 * @since 1.7
2234 */
2235 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2236 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2237 "HANGUL JAMO EXTENDED-A",
2238 "HANGULJAMOEXTENDED-A");
2239
2240 /**
2241 * Constant for the "Javanese" Unicode character block.
2242 * @since 1.7
2243 */
2244 public static final UnicodeBlock JAVANESE =
2245 new UnicodeBlock("JAVANESE");
2246
2247 /**
2248 * Constant for the "Cham" Unicode character block.
2249 * @since 1.7
2250 */
2251 public static final UnicodeBlock CHAM =
2252 new UnicodeBlock("CHAM");
2253
2254 /**
2255 * Constant for the "Myanmar Extended-A" Unicode character block.
2256 * @since 1.7
2257 */
2258 public static final UnicodeBlock MYANMAR_EXTENDED_A =
2259 new UnicodeBlock("MYANMAR_EXTENDED_A",
2260 "MYANMAR EXTENDED-A",
2261 "MYANMAREXTENDED-A");
2262
2263 /**
2264 * Constant for the "Tai Viet" Unicode character block.
2265 * @since 1.7
2266 */
2267 public static final UnicodeBlock TAI_VIET =
2268 new UnicodeBlock("TAI_VIET",
2269 "TAI VIET",
2270 "TAIVIET");
2271
2272 /**
2273 * Constant for the "Ethiopic Extended-A" Unicode character block.
2274 * @since 1.7
2275 */
2276 public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2277 new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2278 "ETHIOPIC EXTENDED-A",
2279 "ETHIOPICEXTENDED-A");
2280
2281 /**
2282 * Constant for the "Meetei Mayek" Unicode character block.
2283 * @since 1.7
2284 */
2285 public static final UnicodeBlock MEETEI_MAYEK =
2286 new UnicodeBlock("MEETEI_MAYEK",
2287 "MEETEI MAYEK",
2288 "MEETEIMAYEK");
2289
2290 /**
2291 * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2292 * @since 1.7
2293 */
2294 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2295 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2296 "HANGUL JAMO EXTENDED-B",
2297 "HANGULJAMOEXTENDED-B");
2298
2299 /**
2300 * Constant for the "Vertical Forms" Unicode character block.
2301 * @since 1.7
2302 */
2303 public static final UnicodeBlock VERTICAL_FORMS =
2304 new UnicodeBlock("VERTICAL_FORMS",
2305 "VERTICAL FORMS",
2306 "VERTICALFORMS");
2307
2308 /**
2309 * Constant for the "Ancient Greek Numbers" Unicode character block.
2310 * @since 1.7
2311 */
2312 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2313 new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2314 "ANCIENT GREEK NUMBERS",
2315 "ANCIENTGREEKNUMBERS");
2316
2317 /**
2318 * Constant for the "Ancient Symbols" Unicode character block.
2319 * @since 1.7
2320 */
2321 public static final UnicodeBlock ANCIENT_SYMBOLS =
2322 new UnicodeBlock("ANCIENT_SYMBOLS",
2323 "ANCIENT SYMBOLS",
2324 "ANCIENTSYMBOLS");
2325
2326 /**
2327 * Constant for the "Phaistos Disc" Unicode character block.
2328 * @since 1.7
2329 */
2330 public static final UnicodeBlock PHAISTOS_DISC =
2331 new UnicodeBlock("PHAISTOS_DISC",
2332 "PHAISTOS DISC",
2333 "PHAISTOSDISC");
2334
2335 /**
2336 * Constant for the "Lycian" Unicode character block.
2337 * @since 1.7
2338 */
2339 public static final UnicodeBlock LYCIAN =
2340 new UnicodeBlock("LYCIAN");
2341
2342 /**
2343 * Constant for the "Carian" Unicode character block.
2344 * @since 1.7
2345 */
2346 public static final UnicodeBlock CARIAN =
2347 new UnicodeBlock("CARIAN");
2348
2349 /**
2350 * Constant for the "Old Persian" Unicode character block.
2351 * @since 1.7
2352 */
2353 public static final UnicodeBlock OLD_PERSIAN =
2354 new UnicodeBlock("OLD_PERSIAN",
2355 "OLD PERSIAN",
2356 "OLDPERSIAN");
2357
2358 /**
2359 * Constant for the "Imperial Aramaic" Unicode character block.
2360 * @since 1.7
2361 */
2362 public static final UnicodeBlock IMPERIAL_ARAMAIC =
2363 new UnicodeBlock("IMPERIAL_ARAMAIC",
2364 "IMPERIAL ARAMAIC",
2365 "IMPERIALARAMAIC");
2366
2367 /**
2368 * Constant for the "Phoenician" Unicode character block.
2369 * @since 1.7
2370 */
2371 public static final UnicodeBlock PHOENICIAN =
2372 new UnicodeBlock("PHOENICIAN");
2373
2374 /**
2375 * Constant for the "Lydian" Unicode character block.
2376 * @since 1.7
2377 */
2378 public static final UnicodeBlock LYDIAN =
2379 new UnicodeBlock("LYDIAN");
2380
2381 /**
2382 * Constant for the "Kharoshthi" Unicode character block.
2383 * @since 1.7
2384 */
2385 public static final UnicodeBlock KHAROSHTHI =
2386 new UnicodeBlock("KHAROSHTHI");
2387
2388 /**
2389 * Constant for the "Old South Arabian" Unicode character block.
2390 * @since 1.7
2391 */
2392 public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2393 new UnicodeBlock("OLD_SOUTH_ARABIAN",
2394 "OLD SOUTH ARABIAN",
2395 "OLDSOUTHARABIAN");
2396
2397 /**
2398 * Constant for the "Avestan" Unicode character block.
2399 * @since 1.7
2400 */
2401 public static final UnicodeBlock AVESTAN =
2402 new UnicodeBlock("AVESTAN");
2403
2404 /**
2405 * Constant for the "Inscriptional Parthian" Unicode character block.
2406 * @since 1.7
2407 */
2408 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2409 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2410 "INSCRIPTIONAL PARTHIAN",
2411 "INSCRIPTIONALPARTHIAN");
2412
2413 /**
2414 * Constant for the "Inscriptional Pahlavi" Unicode character block.
2415 * @since 1.7
2416 */
2417 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2418 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2419 "INSCRIPTIONAL PAHLAVI",
2420 "INSCRIPTIONALPAHLAVI");
2421
2422 /**
2423 * Constant for the "Old Turkic" Unicode character block.
2424 * @since 1.7
2425 */
2426 public static final UnicodeBlock OLD_TURKIC =
2427 new UnicodeBlock("OLD_TURKIC",
2428 "OLD TURKIC",
2429 "OLDTURKIC");
2430
2431 /**
2432 * Constant for the "Rumi Numeral Symbols" Unicode character block.
2433 * @since 1.7
2434 */
2435 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2436 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2437 "RUMI NUMERAL SYMBOLS",
2438 "RUMINUMERALSYMBOLS");
2439
2440 /**
2441 * Constant for the "Brahmi" Unicode character block.
2442 * @since 1.7
2443 */
2444 public static final UnicodeBlock BRAHMI =
2445 new UnicodeBlock("BRAHMI");
2446
2447 /**
2448 * Constant for the "Kaithi" Unicode character block.
2449 * @since 1.7
2450 */
2451 public static final UnicodeBlock KAITHI =
2452 new UnicodeBlock("KAITHI");
2453
2454 /**
2455 * Constant for the "Cuneiform" Unicode character block.
2456 * @since 1.7
2457 */
2458 public static final UnicodeBlock CUNEIFORM =
2459 new UnicodeBlock("CUNEIFORM");
2460
2461 /**
2462 * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2463 * character block.
2464 * @since 1.7
2465 */
2466 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2467 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2468 "CUNEIFORM NUMBERS AND PUNCTUATION",
2469 "CUNEIFORMNUMBERSANDPUNCTUATION");
2470
2471 /**
2472 * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2473 * @since 1.7
2474 */
2475 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2476 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2477 "EGYPTIAN HIEROGLYPHS",
2478 "EGYPTIANHIEROGLYPHS");
2479
2480 /**
2481 * Constant for the "Bamum Supplement" Unicode character block.
2482 * @since 1.7
2483 */
2484 public static final UnicodeBlock BAMUM_SUPPLEMENT =
2485 new UnicodeBlock("BAMUM_SUPPLEMENT",
2486 "BAMUM SUPPLEMENT",
2487 "BAMUMSUPPLEMENT");
2488
2489 /**
2490 * Constant for the "Kana Supplement" Unicode character block.
2491 * @since 1.7
2492 */
2493 public static final UnicodeBlock KANA_SUPPLEMENT =
2494 new UnicodeBlock("KANA_SUPPLEMENT",
2495 "KANA SUPPLEMENT",
2496 "KANASUPPLEMENT");
2497
2498 /**
2499 * Constant for the "Ancient Greek Musical Notation" Unicode character
2500 * block.
2501 * @since 1.7
2502 */
2503 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2504 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2505 "ANCIENT GREEK MUSICAL NOTATION",
2506 "ANCIENTGREEKMUSICALNOTATION");
2507
2508 /**
2509 * Constant for the "Counting Rod Numerals" Unicode character block.
2510 * @since 1.7
2511 */
2512 public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2513 new UnicodeBlock("COUNTING_ROD_NUMERALS",
2514 "COUNTING ROD NUMERALS",
2515 "COUNTINGRODNUMERALS");
2516
2517 /**
2518 * Constant for the "Mahjong Tiles" Unicode character block.
2519 * @since 1.7
2520 */
2521 public static final UnicodeBlock MAHJONG_TILES =
2522 new UnicodeBlock("MAHJONG_TILES",
2523 "MAHJONG TILES",
2524 "MAHJONGTILES");
2525
2526 /**
2527 * Constant for the "Domino Tiles" Unicode character block.
2528 * @since 1.7
2529 */
2530 public static final UnicodeBlock DOMINO_TILES =
2531 new UnicodeBlock("DOMINO_TILES",
2532 "DOMINO TILES",
2533 "DOMINOTILES");
2534
2535 /**
2536 * Constant for the "Playing Cards" Unicode character block.
2537 * @since 1.7
2538 */
2539 public static final UnicodeBlock PLAYING_CARDS =
2540 new UnicodeBlock("PLAYING_CARDS",
2541 "PLAYING CARDS",
2542 "PLAYINGCARDS");
2543
2544 /**
2545 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2546 * block.
2547 * @since 1.7
2548 */
2549 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2550 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2551 "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2552 "ENCLOSEDALPHANUMERICSUPPLEMENT");
2553
2554 /**
2555 * Constant for the "Enclosed Ideographic Supplement" Unicode character
2556 * block.
2557 * @since 1.7
2558 */
2559 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2560 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2561 "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2562 "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2563
2564 /**
2565 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2566 * character block.
2567 * @since 1.7
2568 */
2569 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2570 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2571 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2572 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2573
2574 /**
2575 * Constant for the "Emoticons" Unicode character block.
2576 * @since 1.7
2577 */
2578 public static final UnicodeBlock EMOTICONS =
2579 new UnicodeBlock("EMOTICONS");
2580
2581 /**
2582 * Constant for the "Transport And Map Symbols" Unicode character block.
2583 * @since 1.7
2584 */
2585 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2586 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2587 "TRANSPORT AND MAP SYMBOLS",
2588 "TRANSPORTANDMAPSYMBOLS");
2589
2590 /**
2591 * Constant for the "Alchemical Symbols" Unicode character block.
2592 * @since 1.7
2593 */
2594 public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2595 new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2596 "ALCHEMICAL SYMBOLS",
2597 "ALCHEMICALSYMBOLS");
2598
2599 /**
2600 * Constant for the "CJK Unified Ideographs Extension C" Unicode
2601 * character block.
2602 * @since 1.7
2603 */
2604 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2605 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2606 "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2607 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2608
2609 /**
2610 * Constant for the "CJK Unified Ideographs Extension D" Unicode
2611 * character block.
2612 * @since 1.7
2613 */
2614 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2615 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2616 "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2617 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2618
2619 /**
2620 * Constant for the "Arabic Extended-A" Unicode character block.
2621 * @since 1.8
2622 */
2623 public static final UnicodeBlock ARABIC_EXTENDED_A =
2624 new UnicodeBlock("ARABIC_EXTENDED_A",
2625 "ARABIC EXTENDED-A",
2626 "ARABICEXTENDED-A");
2627
2628 /**
2629 * Constant for the "Sundanese Supplement" Unicode character block.
2630 * @since 1.8
2631 */
2632 public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2633 new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2634 "SUNDANESE SUPPLEMENT",
2635 "SUNDANESESUPPLEMENT");
2636
2637 /**
2638 * Constant for the "Meetei Mayek Extensions" Unicode character block.
2639 * @since 1.8
2640 */
2641 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2642 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2643 "MEETEI MAYEK EXTENSIONS",
2644 "MEETEIMAYEKEXTENSIONS");
2645
2646 /**
2647 * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2648 * @since 1.8
2649 */
2650 public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2651 new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2652 "MEROITIC HIEROGLYPHS",
2653 "MEROITICHIEROGLYPHS");
2654
2655 /**
2656 * Constant for the "Meroitic Cursive" Unicode character block.
2657 * @since 1.8
2658 */
2659 public static final UnicodeBlock MEROITIC_CURSIVE =
2660 new UnicodeBlock("MEROITIC_CURSIVE",
2661 "MEROITIC CURSIVE",
2662 "MEROITICCURSIVE");
2663
2664 /**
2665 * Constant for the "Sora Sompeng" Unicode character block.
2666 * @since 1.8
2667 */
2668 public static final UnicodeBlock SORA_SOMPENG =
2669 new UnicodeBlock("SORA_SOMPENG",
2670 "SORA SOMPENG",
2671 "SORASOMPENG");
2672
2673 /**
2674 * Constant for the "Chakma" Unicode character block.
2675 * @since 1.8
2676 */
2677 public static final UnicodeBlock CHAKMA =
2678 new UnicodeBlock("CHAKMA");
2679
2680 /**
2681 * Constant for the "Sharada" Unicode character block.
2682 * @since 1.8
2683 */
2684 public static final UnicodeBlock SHARADA =
2685 new UnicodeBlock("SHARADA");
2686
2687 /**
2688 * Constant for the "Takri" Unicode character block.
2689 * @since 1.8
2690 */
2691 public static final UnicodeBlock TAKRI =
2692 new UnicodeBlock("TAKRI");
2693
2694 /**
2695 * Constant for the "Miao" Unicode character block.
2696 * @since 1.8
2697 */
2698 public static final UnicodeBlock MIAO =
2699 new UnicodeBlock("MIAO");
2700
2701 /**
2702 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2703 * character block.
2704 * @since 1.8
2705 */
2706 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2707 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2708 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2709 "ARABICMATHEMATICALALPHABETICSYMBOLS");
2710
2711 /**
2712 * Constant for the "Combining Diacritical Marks Extended" Unicode
2713 * character block.
2714 * @since 9
2715 */
2716 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2717 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2718 "COMBINING DIACRITICAL MARKS EXTENDED",
2719 "COMBININGDIACRITICALMARKSEXTENDED");
2720
2721 /**
2722 * Constant for the "Myanmar Extended-B" Unicode character block.
2723 * @since 9
2724 */
2725 public static final UnicodeBlock MYANMAR_EXTENDED_B =
2726 new UnicodeBlock("MYANMAR_EXTENDED_B",
2727 "MYANMAR EXTENDED-B",
2728 "MYANMAREXTENDED-B");
2729
2730 /**
2731 * Constant for the "Latin Extended-E" Unicode character block.
2732 * @since 9
2733 */
2734 public static final UnicodeBlock LATIN_EXTENDED_E =
2735 new UnicodeBlock("LATIN_EXTENDED_E",
2736 "LATIN EXTENDED-E",
2737 "LATINEXTENDED-E");
2738
2739 /**
2740 * Constant for the "Coptic Epact Numbers" Unicode character block.
2741 * @since 9
2742 */
2743 public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2744 new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2745 "COPTIC EPACT NUMBERS",
2746 "COPTICEPACTNUMBERS");
2747
2748 /**
2749 * Constant for the "Old Permic" Unicode character block.
2750 * @since 9
2751 */
2752 public static final UnicodeBlock OLD_PERMIC =
2753 new UnicodeBlock("OLD_PERMIC",
2754 "OLD PERMIC",
2755 "OLDPERMIC");
2756
2757 /**
2758 * Constant for the "Elbasan" Unicode character block.
2759 * @since 9
2760 */
2761 public static final UnicodeBlock ELBASAN =
2762 new UnicodeBlock("ELBASAN");
2763
2764 /**
2765 * Constant for the "Caucasian Albanian" Unicode character block.
2766 * @since 9
2767 */
2768 public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2769 new UnicodeBlock("CAUCASIAN_ALBANIAN",
2770 "CAUCASIAN ALBANIAN",
2771 "CAUCASIANALBANIAN");
2772
2773 /**
2774 * Constant for the "Linear A" Unicode character block.
2775 * @since 9
2776 */
2777 public static final UnicodeBlock LINEAR_A =
2778 new UnicodeBlock("LINEAR_A",
2779 "LINEAR A",
2780 "LINEARA");
2781
2782 /**
2783 * Constant for the "Palmyrene" Unicode character block.
2784 * @since 9
2785 */
2786 public static final UnicodeBlock PALMYRENE =
2787 new UnicodeBlock("PALMYRENE");
2788
2789 /**
2790 * Constant for the "Nabataean" Unicode character block.
2791 * @since 9
2792 */
2793 public static final UnicodeBlock NABATAEAN =
2794 new UnicodeBlock("NABATAEAN");
2795
2796 /**
2797 * Constant for the "Old North Arabian" Unicode character block.
2798 * @since 9
2799 */
2800 public static final UnicodeBlock OLD_NORTH_ARABIAN =
2801 new UnicodeBlock("OLD_NORTH_ARABIAN",
2802 "OLD NORTH ARABIAN",
2803 "OLDNORTHARABIAN");
2804
2805 /**
2806 * Constant for the "Manichaean" Unicode character block.
2807 * @since 9
2808 */
2809 public static final UnicodeBlock MANICHAEAN =
2810 new UnicodeBlock("MANICHAEAN");
2811
2812 /**
2813 * Constant for the "Psalter Pahlavi" Unicode character block.
2814 * @since 9
2815 */
2816 public static final UnicodeBlock PSALTER_PAHLAVI =
2817 new UnicodeBlock("PSALTER_PAHLAVI",
2818 "PSALTER PAHLAVI",
2819 "PSALTERPAHLAVI");
2820
2821 /**
2822 * Constant for the "Mahajani" Unicode character block.
2823 * @since 9
2824 */
2825 public static final UnicodeBlock MAHAJANI =
2826 new UnicodeBlock("MAHAJANI");
2827
2828 /**
2829 * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2830 * @since 9
2831 */
2832 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2833 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2834 "SINHALA ARCHAIC NUMBERS",
2835 "SINHALAARCHAICNUMBERS");
2836
2837 /**
2838 * Constant for the "Khojki" Unicode character block.
2839 * @since 9
2840 */
2841 public static final UnicodeBlock KHOJKI =
2842 new UnicodeBlock("KHOJKI");
2843
2844 /**
2845 * Constant for the "Khudawadi" Unicode character block.
2846 * @since 9
2847 */
2848 public static final UnicodeBlock KHUDAWADI =
2849 new UnicodeBlock("KHUDAWADI");
2850
2851 /**
2852 * Constant for the "Grantha" Unicode character block.
2853 * @since 9
2854 */
2855 public static final UnicodeBlock GRANTHA =
2856 new UnicodeBlock("GRANTHA");
2857
2858 /**
2859 * Constant for the "Tirhuta" Unicode character block.
2860 * @since 9
2861 */
2862 public static final UnicodeBlock TIRHUTA =
2863 new UnicodeBlock("TIRHUTA");
2864
2865 /**
2866 * Constant for the "Siddham" Unicode character block.
2867 * @since 9
2868 */
2869 public static final UnicodeBlock SIDDHAM =
2870 new UnicodeBlock("SIDDHAM");
2871
2872 /**
2873 * Constant for the "Modi" Unicode character block.
2874 * @since 9
2875 */
2876 public static final UnicodeBlock MODI =
2877 new UnicodeBlock("MODI");
2878
2879 /**
2880 * Constant for the "Warang Citi" Unicode character block.
2881 * @since 9
2882 */
2883 public static final UnicodeBlock WARANG_CITI =
2884 new UnicodeBlock("WARANG_CITI",
2885 "WARANG CITI",
2886 "WARANGCITI");
2887
2888 /**
2889 * Constant for the "Pau Cin Hau" Unicode character block.
2890 * @since 9
2891 */
2892 public static final UnicodeBlock PAU_CIN_HAU =
2893 new UnicodeBlock("PAU_CIN_HAU",
2894 "PAU CIN HAU",
2895 "PAUCINHAU");
2896
2897 /**
2898 * Constant for the "Mro" Unicode character block.
2899 * @since 9
2900 */
2901 public static final UnicodeBlock MRO =
2902 new UnicodeBlock("MRO");
2903
2904 /**
2905 * Constant for the "Bassa Vah" Unicode character block.
2906 * @since 9
2907 */
2908 public static final UnicodeBlock BASSA_VAH =
2909 new UnicodeBlock("BASSA_VAH",
2910 "BASSA VAH",
2911 "BASSAVAH");
2912
2913 /**
2914 * Constant for the "Pahawh Hmong" Unicode character block.
2915 * @since 9
2916 */
2917 public static final UnicodeBlock PAHAWH_HMONG =
2918 new UnicodeBlock("PAHAWH_HMONG",
2919 "PAHAWH HMONG",
2920 "PAHAWHHMONG");
2921
2922 /**
2923 * Constant for the "Duployan" Unicode character block.
2924 * @since 9
2925 */
2926 public static final UnicodeBlock DUPLOYAN =
2927 new UnicodeBlock("DUPLOYAN");
2928
2929 /**
2930 * Constant for the "Shorthand Format Controls" Unicode character block.
2931 * @since 9
2932 */
2933 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2934 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2935 "SHORTHAND FORMAT CONTROLS",
2936 "SHORTHANDFORMATCONTROLS");
2937
2938 /**
2939 * Constant for the "Mende Kikakui" Unicode character block.
2940 * @since 9
2941 */
2942 public static final UnicodeBlock MENDE_KIKAKUI =
2943 new UnicodeBlock("MENDE_KIKAKUI",
2944 "MENDE KIKAKUI",
2945 "MENDEKIKAKUI");
2946
2947 /**
2948 * Constant for the "Ornamental Dingbats" Unicode character block.
2949 * @since 9
2950 */
2951 public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2952 new UnicodeBlock("ORNAMENTAL_DINGBATS",
2953 "ORNAMENTAL DINGBATS",
2954 "ORNAMENTALDINGBATS");
2955
2956 /**
2957 * Constant for the "Geometric Shapes Extended" Unicode character block.
2958 * @since 9
2959 */
2960 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2961 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2962 "GEOMETRIC SHAPES EXTENDED",
2963 "GEOMETRICSHAPESEXTENDED");
2964
2965 /**
2966 * Constant for the "Supplemental Arrows-C" Unicode character block.
2967 * @since 9
2968 */
2969 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2970 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2971 "SUPPLEMENTAL ARROWS-C",
2972 "SUPPLEMENTALARROWS-C");
2973
2974 /**
2975 * Constant for the "Cherokee Supplement" Unicode character block.
2976 * @since 9
2977 */
2978 public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2979 new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2980 "CHEROKEE SUPPLEMENT",
2981 "CHEROKEESUPPLEMENT");
2982
2983 /**
2984 * Constant for the "Hatran" Unicode character block.
2985 * @since 9
2986 */
2987 public static final UnicodeBlock HATRAN =
2988 new UnicodeBlock("HATRAN");
2989
2990 /**
2991 * Constant for the "Old Hungarian" Unicode character block.
2992 * @since 9
2993 */
2994 public static final UnicodeBlock OLD_HUNGARIAN =
2995 new UnicodeBlock("OLD_HUNGARIAN",
2996 "OLD HUNGARIAN",
2997 "OLDHUNGARIAN");
2998
2999 /**
3000 * Constant for the "Multani" Unicode character block.
3001 * @since 9
3002 */
3003 public static final UnicodeBlock MULTANI =
3004 new UnicodeBlock("MULTANI");
3005
3006 /**
3007 * Constant for the "Ahom" Unicode character block.
3008 * @since 9
3009 */
3010 public static final UnicodeBlock AHOM =
3011 new UnicodeBlock("AHOM");
3012
3013 /**
3014 * Constant for the "Early Dynastic Cuneiform" Unicode character block.
3015 * @since 9
3016 */
3017 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
3018 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
3019 "EARLY DYNASTIC CUNEIFORM",
3020 "EARLYDYNASTICCUNEIFORM");
3021
3022 /**
3023 * Constant for the "Anatolian Hieroglyphs" Unicode character block.
3024 * @since 9
3025 */
3026 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
3027 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
3028 "ANATOLIAN HIEROGLYPHS",
3029 "ANATOLIANHIEROGLYPHS");
3030
3031 /**
3032 * Constant for the "Sutton SignWriting" Unicode character block.
3033 * @since 9
3034 */
3035 public static final UnicodeBlock SUTTON_SIGNWRITING =
3036 new UnicodeBlock("SUTTON_SIGNWRITING",
3037 "SUTTON SIGNWRITING",
3038 "SUTTONSIGNWRITING");
3039
3040 /**
3041 * Constant for the "Supplemental Symbols and Pictographs" Unicode
3042 * character block.
3043 * @since 9
3044 */
3045 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
3046 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
3047 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
3048 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
3049
3050 /**
3051 * Constant for the "CJK Unified Ideographs Extension E" Unicode
3052 * character block.
3053 * @since 9
3054 */
3055 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
3056 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
3057 "CJK UNIFIED IDEOGRAPHS EXTENSION E",
3058 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
3059
3060 /**
3061 * Constant for the "Syriac Supplement" Unicode
3062 * character block.
3063 * @since 11
3064 */
3065 public static final UnicodeBlock SYRIAC_SUPPLEMENT =
3066 new UnicodeBlock("SYRIAC_SUPPLEMENT",
3067 "SYRIAC SUPPLEMENT",
3068 "SYRIACSUPPLEMENT");
3069
3070 /**
3071 * Constant for the "Cyrillic Extended-C" Unicode
3072 * character block.
3073 * @since 11
3074 */
3075 public static final UnicodeBlock CYRILLIC_EXTENDED_C =
3076 new UnicodeBlock("CYRILLIC_EXTENDED_C",
3077 "CYRILLIC EXTENDED-C",
3078 "CYRILLICEXTENDED-C");
3079
3080 /**
3081 * Constant for the "Osage" Unicode
3082 * character block.
3083 * @since 11
3084 */
3085 public static final UnicodeBlock OSAGE =
3086 new UnicodeBlock("OSAGE");
3087
3088 /**
3089 * Constant for the "Newa" Unicode
3090 * character block.
3091 * @since 11
3092 */
3093 public static final UnicodeBlock NEWA =
3094 new UnicodeBlock("NEWA");
3095
3096 /**
3097 * Constant for the "Mongolian Supplement" Unicode
3098 * character block.
3099 * @since 11
3100 */
3101 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
3102 new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
3103 "MONGOLIAN SUPPLEMENT",
3104 "MONGOLIANSUPPLEMENT");
3105
3106 /**
3107 * Constant for the "Marchen" Unicode
3108 * character block.
3109 * @since 11
3110 */
3111 public static final UnicodeBlock MARCHEN =
3112 new UnicodeBlock("MARCHEN");
3113
3114 /**
3115 * Constant for the "Ideographic Symbols and Punctuation" Unicode
3116 * character block.
3117 * @since 11
3118 */
3119 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3120 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3121 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3122 "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3123
3124 /**
3125 * Constant for the "Tangut" Unicode
3126 * character block.
3127 * @since 11
3128 */
3129 public static final UnicodeBlock TANGUT =
3130 new UnicodeBlock("TANGUT");
3131
3132 /**
3133 * Constant for the "Tangut Components" Unicode
3134 * character block.
3135 * @since 11
3136 */
3137 public static final UnicodeBlock TANGUT_COMPONENTS =
3138 new UnicodeBlock("TANGUT_COMPONENTS",
3139 "TANGUT COMPONENTS",
3140 "TANGUTCOMPONENTS");
3141
3142 /**
3143 * Constant for the "Kana Extended-A" Unicode
3144 * character block.
3145 * @since 11
3146 */
3147 public static final UnicodeBlock KANA_EXTENDED_A =
3148 new UnicodeBlock("KANA_EXTENDED_A",
3149 "KANA EXTENDED-A",
3150 "KANAEXTENDED-A");
3151 /**
3152 * Constant for the "Glagolitic Supplement" Unicode
3153 * character block.
3154 * @since 11
3155 */
3156 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3157 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3158 "GLAGOLITIC SUPPLEMENT",
3159 "GLAGOLITICSUPPLEMENT");
3160 /**
3161 * Constant for the "Adlam" Unicode
3162 * character block.
3163 * @since 11
3164 */
3165 public static final UnicodeBlock ADLAM =
3166 new UnicodeBlock("ADLAM");
3167
3168 /**
3169 * Constant for the "Masaram Gondi" Unicode
3170 * character block.
3171 * @since 11
3172 */
3173 public static final UnicodeBlock MASARAM_GONDI =
3174 new UnicodeBlock("MASARAM_GONDI",
3175 "MASARAM GONDI",
3176 "MASARAMGONDI");
3177
3178 /**
3179 * Constant for the "Zanabazar Square" Unicode
3180 * character block.
3181 * @since 11
3182 */
3183 public static final UnicodeBlock ZANABAZAR_SQUARE =
3184 new UnicodeBlock("ZANABAZAR_SQUARE",
3185 "ZANABAZAR SQUARE",
3186 "ZANABAZARSQUARE");
3187
3188 /**
3189 * Constant for the "Nushu" Unicode
3190 * character block.
3191 * @since 11
3192 */
3193 public static final UnicodeBlock NUSHU =
3194 new UnicodeBlock("NUSHU");
3195
3196 /**
3197 * Constant for the "Soyombo" Unicode
3198 * character block.
3199 * @since 11
3200 */
3201 public static final UnicodeBlock SOYOMBO =
3202 new UnicodeBlock("SOYOMBO");
3203
3204 /**
3205 * Constant for the "Bhaiksuki" Unicode
3206 * character block.
3207 * @since 11
3208 */
3209 public static final UnicodeBlock BHAIKSUKI =
3210 new UnicodeBlock("BHAIKSUKI");
3211
3212 /**
3213 * Constant for the "CJK Unified Ideographs Extension F" Unicode
3214 * character block.
3215 * @since 11
3216 */
3217 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3218 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3219 "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3220 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3221 /**
3222 * Constant for the "Georgian Extended" Unicode
3223 * character block.
3224 * @since 12
3225 */
3226 public static final UnicodeBlock GEORGIAN_EXTENDED =
3227 new UnicodeBlock("GEORGIAN_EXTENDED",
3228 "GEORGIAN EXTENDED",
3229 "GEORGIANEXTENDED");
3230
3231 /**
3232 * Constant for the "Hanifi Rohingya" Unicode
3233 * character block.
3234 * @since 12
3235 */
3236 public static final UnicodeBlock HANIFI_ROHINGYA =
3237 new UnicodeBlock("HANIFI_ROHINGYA",
3238 "HANIFI ROHINGYA",
3239 "HANIFIROHINGYA");
3240
3241 /**
3242 * Constant for the "Old Sogdian" Unicode
3243 * character block.
3244 * @since 12
3245 */
3246 public static final UnicodeBlock OLD_SOGDIAN =
3247 new UnicodeBlock("OLD_SOGDIAN",
3248 "OLD SOGDIAN",
3249 "OLDSOGDIAN");
3250
3251 /**
3252 * Constant for the "Sogdian" Unicode
3253 * character block.
3254 * @since 12
3255 */
3256 public static final UnicodeBlock SOGDIAN =
3257 new UnicodeBlock("SOGDIAN");
3258
3259 /**
3260 * Constant for the "Dogra" Unicode
3261 * character block.
3262 * @since 12
3263 */
3264 public static final UnicodeBlock DOGRA =
3265 new UnicodeBlock("DOGRA");
3266
3267 /**
3268 * Constant for the "Gunjala Gondi" Unicode
3269 * character block.
3270 * @since 12
3271 */
3272 public static final UnicodeBlock GUNJALA_GONDI =
3273 new UnicodeBlock("GUNJALA_GONDI",
3274 "GUNJALA GONDI",
3275 "GUNJALAGONDI");
3276
3277 /**
3278 * Constant for the "Makasar" Unicode
3279 * character block.
3280 * @since 12
3281 */
3282 public static final UnicodeBlock MAKASAR =
3283 new UnicodeBlock("MAKASAR");
3284
3285 /**
3286 * Constant for the "Medefaidrin" Unicode
3287 * character block.
3288 * @since 12
3289 */
3290 public static final UnicodeBlock MEDEFAIDRIN =
3291 new UnicodeBlock("MEDEFAIDRIN");
3292
3293 /**
3294 * Constant for the "Mayan Numerals" Unicode
3295 * character block.
3296 * @since 12
3297 */
3298 public static final UnicodeBlock MAYAN_NUMERALS =
3299 new UnicodeBlock("MAYAN_NUMERALS",
3300 "MAYAN NUMERALS",
3301 "MAYANNUMERALS");
3302
3303 /**
3304 * Constant for the "Indic Siyaq Numbers" Unicode
3305 * character block.
3306 * @since 12
3307 */
3308 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3309 new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3310 "INDIC SIYAQ NUMBERS",
3311 "INDICSIYAQNUMBERS");
3312
3313 /**
3314 * Constant for the "Chess Symbols" Unicode
3315 * character block.
3316 * @since 12
3317 */
3318 public static final UnicodeBlock CHESS_SYMBOLS =
3319 new UnicodeBlock("CHESS_SYMBOLS",
3320 "CHESS SYMBOLS",
3321 "CHESSSYMBOLS");
3322
3323 /**
3324 * Constant for the "Elymaic" Unicode
3325 * character block.
3326 * @since 13
3327 */
3328 public static final UnicodeBlock ELYMAIC =
3329 new UnicodeBlock("ELYMAIC");
3330
3331 /**
3332 * Constant for the "Nandinagari" Unicode
3333 * character block.
3334 * @since 13
3335 */
3336 public static final UnicodeBlock NANDINAGARI =
3337 new UnicodeBlock("NANDINAGARI");
3338
3339 /**
3340 * Constant for the "Tamil Supplement" Unicode
3341 * character block.
3342 * @since 13
3343 */
3344 public static final UnicodeBlock TAMIL_SUPPLEMENT =
3345 new UnicodeBlock("TAMIL_SUPPLEMENT",
3346 "TAMIL SUPPLEMENT",
3347 "TAMILSUPPLEMENT");
3348
3349 /**
3350 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3351 * character block.
3352 * @since 13
3353 */
3354 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3355 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3356 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3357 "EGYPTIANHIEROGLYPHFORMATCONTROLS");
3358
3359 /**
3360 * Constant for the "Small Kana Extension" Unicode
3361 * character block.
3362 * @since 13
3363 */
3364 public static final UnicodeBlock SMALL_KANA_EXTENSION =
3365 new UnicodeBlock("SMALL_KANA_EXTENSION",
3366 "SMALL KANA EXTENSION",
3367 "SMALLKANAEXTENSION");
3368
3369 /**
3370 * Constant for the "Nyiakeng Puachue Hmong" Unicode
3371 * character block.
3372 * @since 13
3373 */
3374 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3375 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3376 "NYIAKENG PUACHUE HMONG",
3377 "NYIAKENGPUACHUEHMONG");
3378
3379 /**
3380 * Constant for the "Wancho" Unicode
3381 * character block.
3382 * @since 13
3383 */
3384 public static final UnicodeBlock WANCHO =
3385 new UnicodeBlock("WANCHO");
3386
3387 /**
3388 * Constant for the "Ottoman Siyaq Numbers" Unicode
3389 * character block.
3390 * @since 13
3391 */
3392 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3393 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3394 "OTTOMAN SIYAQ NUMBERS",
3395 "OTTOMANSIYAQNUMBERS");
3396
3397 /**
3398 * Constant for the "Symbols and Pictographs Extended-A" Unicode
3399 * character block.
3400 * @since 13
3401 */
3402 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3403 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3404 "SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3405 "SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3406
3407 /**
3408 * Constant for the "Yezidi" Unicode
3409 * character block.
3410 * @since 15
3411 */
3412 public static final UnicodeBlock YEZIDI =
3413 new UnicodeBlock("YEZIDI");
3414
3415 /**
3416 * Constant for the "Chorasmian" Unicode
3417 * character block.
3418 * @since 15
3419 */
3420 public static final UnicodeBlock CHORASMIAN =
3421 new UnicodeBlock("CHORASMIAN");
3422
3423 /**
3424 * Constant for the "Dives Akuru" Unicode
3425 * character block.
3426 * @since 15
3427 */
3428 public static final UnicodeBlock DIVES_AKURU =
3429 new UnicodeBlock("DIVES_AKURU",
3430 "DIVES AKURU",
3431 "DIVESAKURU");
3432
3433 /**
3434 * Constant for the "Lisu Supplement" Unicode
3435 * character block.
3436 * @since 15
3437 */
3438 public static final UnicodeBlock LISU_SUPPLEMENT =
3439 new UnicodeBlock("LISU_SUPPLEMENT",
3440 "LISU SUPPLEMENT",
3441 "LISUSUPPLEMENT");
3442
3443 /**
3444 * Constant for the "Khitan Small Script" Unicode
3445 * character block.
3446 * @since 15
3447 */
3448 public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
3449 new UnicodeBlock("KHITAN_SMALL_SCRIPT",
3450 "KHITAN SMALL SCRIPT",
3451 "KHITANSMALLSCRIPT");
3452
3453 /**
3454 * Constant for the "Tangut Supplement" Unicode
3455 * character block.
3456 * @since 15
3457 */
3458 public static final UnicodeBlock TANGUT_SUPPLEMENT =
3459 new UnicodeBlock("TANGUT_SUPPLEMENT",
3460 "TANGUT SUPPLEMENT",
3461 "TANGUTSUPPLEMENT");
3462
3463 /**
3464 * Constant for the "Symbols for Legacy Computing" Unicode
3465 * character block.
3466 * @since 15
3467 */
3468 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
3469 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING",
3470 "SYMBOLS FOR LEGACY COMPUTING",
3471 "SYMBOLSFORLEGACYCOMPUTING");
3472
3473 /**
3474 * Constant for the "CJK Unified Ideographs Extension G" Unicode
3475 * character block.
3476 * @since 15
3477 */
3478 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
3479 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
3480 "CJK UNIFIED IDEOGRAPHS EXTENSION G",
3481 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG");
3482
3483 /**
3484 * Constant for the "Arabic Extended-B" Unicode
3485 * character block.
3486 * @since 19
3487 */
3488 public static final UnicodeBlock ARABIC_EXTENDED_B =
3489 new UnicodeBlock("ARABIC_EXTENDED_B",
3490 "ARABIC EXTENDED-B",
3491 "ARABICEXTENDED-B");
3492
3493 /**
3494 * Constant for the "Vithkuqi" Unicode
3495 * character block.
3496 * @since 19
3497 */
3498 public static final UnicodeBlock VITHKUQI =
3499 new UnicodeBlock("VITHKUQI");
3500
3501 /**
3502 * Constant for the "Latin Extended-F" Unicode
3503 * character block.
3504 * @since 19
3505 */
3506 public static final UnicodeBlock LATIN_EXTENDED_F =
3507 new UnicodeBlock("LATIN_EXTENDED_F",
3508 "LATIN EXTENDED-F",
3509 "LATINEXTENDED-F");
3510
3511 /**
3512 * Constant for the "Old Uyghur" Unicode
3513 * character block.
3514 * @since 19
3515 */
3516 public static final UnicodeBlock OLD_UYGHUR =
3517 new UnicodeBlock("OLD_UYGHUR",
3518 "OLD UYGHUR",
3519 "OLDUYGHUR");
3520
3521 /**
3522 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode
3523 * character block.
3524 * @since 19
3525 */
3526 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A =
3527 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A",
3528 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A",
3529 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A");
3530
3531 /**
3532 * Constant for the "Cypro-Minoan" Unicode
3533 * character block.
3534 * @since 19
3535 */
3536 public static final UnicodeBlock CYPRO_MINOAN =
3537 new UnicodeBlock("CYPRO_MINOAN",
3538 "CYPRO-MINOAN",
3539 "CYPRO-MINOAN");
3540
3541 /**
3542 * Constant for the "Tangsa" Unicode
3543 * character block.
3544 * @since 19
3545 */
3546 public static final UnicodeBlock TANGSA =
3547 new UnicodeBlock("TANGSA");
3548
3549 /**
3550 * Constant for the "Kana Extended-B" Unicode
3551 * character block.
3552 * @since 19
3553 */
3554 public static final UnicodeBlock KANA_EXTENDED_B =
3555 new UnicodeBlock("KANA_EXTENDED_B",
3556 "KANA EXTENDED-B",
3557 "KANAEXTENDED-B");
3558
3559 /**
3560 * Constant for the "Znamenny Musical Notation" Unicode
3561 * character block.
3562 * @since 19
3563 */
3564 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION =
3565 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION",
3566 "ZNAMENNY MUSICAL NOTATION",
3567 "ZNAMENNYMUSICALNOTATION");
3568
3569 /**
3570 * Constant for the "Latin Extended-G" Unicode
3571 * character block.
3572 * @since 19
3573 */
3574 public static final UnicodeBlock LATIN_EXTENDED_G =
3575 new UnicodeBlock("LATIN_EXTENDED_G",
3576 "LATIN EXTENDED-G",
3577 "LATINEXTENDED-G");
3578
3579 /**
3580 * Constant for the "Toto" Unicode
3581 * character block.
3582 * @since 19
3583 */
3584 public static final UnicodeBlock TOTO =
3585 new UnicodeBlock("TOTO");
3586
3587 /**
3588 * Constant for the "Ethiopic Extended-B" Unicode
3589 * character block.
3590 * @since 19
3591 */
3592 public static final UnicodeBlock ETHIOPIC_EXTENDED_B =
3593 new UnicodeBlock("ETHIOPIC_EXTENDED_B",
3594 "ETHIOPIC EXTENDED-B",
3595 "ETHIOPICEXTENDED-B");
3596
3597 /**
3598 * Constant for the "Arabic Extended-C" Unicode
3599 * character block.
3600 * @since 20
3601 */
3602 public static final UnicodeBlock ARABIC_EXTENDED_C =
3603 new UnicodeBlock("ARABIC_EXTENDED_C",
3604 "ARABIC EXTENDED-C",
3605 "ARABICEXTENDED-C");
3606
3607 /**
3608 * Constant for the "Devanagari Extended-A" Unicode
3609 * character block.
3610 * @since 20
3611 */
3612 public static final UnicodeBlock DEVANAGARI_EXTENDED_A =
3613 new UnicodeBlock("DEVANAGARI_EXTENDED_A",
3614 "DEVANAGARI EXTENDED-A",
3615 "DEVANAGARIEXTENDED-A");
3616
3617 /**
3618 * Constant for the "Kawi" Unicode
3619 * character block.
3620 * @since 20
3621 */
3622 public static final UnicodeBlock KAWI =
3623 new UnicodeBlock("KAWI");
3624
3625 /**
3626 * Constant for the "Kaktovik Numerals" Unicode
3627 * character block.
3628 * @since 20
3629 */
3630 public static final UnicodeBlock KAKTOVIK_NUMERALS =
3631 new UnicodeBlock("KAKTOVIK_NUMERALS",
3632 "KAKTOVIK NUMERALS",
3633 "KAKTOVIKNUMERALS");
3634
3635 /**
3636 * Constant for the "Cyrillic Extended-D" Unicode
3637 * character block.
3638 * @since 20
3639 */
3640 public static final UnicodeBlock CYRILLIC_EXTENDED_D =
3641 new UnicodeBlock("CYRILLIC_EXTENDED_D",
3642 "CYRILLIC EXTENDED-D",
3643 "CYRILLICEXTENDED-D");
3644
3645 /**
3646 * Constant for the "Nag Mundari" Unicode
3647 * character block.
3648 * @since 20
3649 */
3650 public static final UnicodeBlock NAG_MUNDARI =
3651 new UnicodeBlock("NAG_MUNDARI",
3652 "NAG MUNDARI",
3653 "NAGMUNDARI");
3654
3655 /**
3656 * Constant for the "CJK Unified Ideographs Extension H" Unicode
3657 * character block.
3658 * @since 20
3659 */
3660 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H =
3661 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H",
3662 "CJK UNIFIED IDEOGRAPHS EXTENSION H",
3663 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH");
3664
3665 /**
3666 * Constant for the "CJK Unified Ideographs Extension I" Unicode
3667 * character block.
3668 * @since 22
3669 */
3670 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I =
3671 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I",
3672 "CJK UNIFIED IDEOGRAPHS EXTENSION I",
3673 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI");
3674
3675 /**
3676 * Constant for the "Todhri" Unicode
3677 * character block.
3678 * @since 24
3679 */
3680 public static final UnicodeBlock TODHRI =
3681 new UnicodeBlock("TODHRI");
3682
3683 /**
3684 * Constant for the "Garay" Unicode
3685 * character block.
3686 * @since 24
3687 */
3688 public static final UnicodeBlock GARAY =
3689 new UnicodeBlock("GARAY");
3690
3691 /**
3692 * Constant for the "Tulu-Tigalari" Unicode
3693 * character block.
3694 * @since 24
3695 */
3696 public static final UnicodeBlock TULU_TIGALARI =
3697 new UnicodeBlock("TULU_TIGALARI",
3698 "TULU-TIGALARI");
3699
3700 /**
3701 * Constant for the "Myanmar Extended-C" Unicode
3702 * character block.
3703 * @since 24
3704 */
3705 public static final UnicodeBlock MYANMAR_EXTENDED_C =
3706 new UnicodeBlock("MYANMAR_EXTENDED_C",
3707 "MYANMAR EXTENDED-C",
3708 "MYANMAREXTENDED-C");
3709
3710 /**
3711 * Constant for the "Sunuwar" Unicode
3712 * character block.
3713 * @since 24
3714 */
3715 public static final UnicodeBlock SUNUWAR =
3716 new UnicodeBlock("SUNUWAR");
3717
3718 /**
3719 * Constant for the "Egyptian Hieroglyphs Extended-A" Unicode
3720 * character block.
3721 * @since 24
3722 */
3723 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS_EXTENDED_A =
3724 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS_EXTENDED_A",
3725 "EGYPTIAN HIEROGLYPHS EXTENDED-A",
3726 "EGYPTIANHIEROGLYPHSEXTENDED-A");
3727
3728 /**
3729 * Constant for the "Gurung Khema" Unicode
3730 * character block.
3731 * @since 24
3732 */
3733 public static final UnicodeBlock GURUNG_KHEMA =
3734 new UnicodeBlock("GURUNG_KHEMA",
3735 "GURUNG KHEMA",
3736 "GURUNGKHEMA");
3737
3738 /**
3739 * Constant for the "Kirat Rai" Unicode
3740 * character block.
3741 * @since 24
3742 */
3743 public static final UnicodeBlock KIRAT_RAI =
3744 new UnicodeBlock("KIRAT_RAI",
3745 "KIRAT RAI",
3746 "KIRATRAI");
3747
3748 /**
3749 * Constant for the "Symbols for Legacy Computing Supplement" Unicode
3750 * character block.
3751 * @since 24
3752 */
3753 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT =
3754 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT",
3755 "SYMBOLS FOR LEGACY COMPUTING SUPPLEMENT",
3756 "SYMBOLSFORLEGACYCOMPUTINGSUPPLEMENT");
3757
3758 /**
3759 * Constant for the "Ol Onal" Unicode
3760 * character block.
3761 * @since 24
3762 */
3763 public static final UnicodeBlock OL_ONAL =
3764 new UnicodeBlock("OL_ONAL",
3765 "OL ONAL",
3766 "OLONAL");
3767
3768 /**
3769 * Constant for the "Sidetic" Unicode
3770 * character block.
3771 * @since 26
3772 */
3773 public static final UnicodeBlock SIDETIC =
3774 new UnicodeBlock("SIDETIC");
3775
3776 /**
3777 * Constant for the "Sharada Supplement" Unicode
3778 * character block.
3779 * @since 26
3780 */
3781 public static final UnicodeBlock SHARADA_SUPPLEMENT =
3782 new UnicodeBlock("SHARADA_SUPPLEMENT",
3783 "SHARADA SUPPLEMENT",
3784 "SHARADASUPPLEMENT");
3785
3786 /**
3787 * Constant for the "Tolong Siki" Unicode
3788 * character block.
3789 * @since 26
3790 */
3791 public static final UnicodeBlock TOLONG_SIKI =
3792 new UnicodeBlock("TOLONG_SIKI",
3793 "TOLONG SIKI",
3794 "TOLONGSIKI");
3795
3796 /**
3797 * Constant for the "Beria Erfe" Unicode
3798 * character block.
3799 * @since 26
3800 */
3801 public static final UnicodeBlock BERIA_ERFE =
3802 new UnicodeBlock("BERIA_ERFE",
3803 "BERIA ERFE",
3804 "BERIAERFE");
3805
3806 /**
3807 * Constant for the "Tangut Components Supplement" Unicode
3808 * character block.
3809 * @since 26
3810 */
3811 public static final UnicodeBlock TANGUT_COMPONENTS_SUPPLEMENT =
3812 new UnicodeBlock("TANGUT_COMPONENTS_SUPPLEMENT",
3813 "TANGUT COMPONENTS SUPPLEMENT",
3814 "TANGUTCOMPONENTSSUPPLEMENT");
3815
3816 /**
3817 * Constant for the "Miscellaneous Symbols Supplement" Unicode
3818 * character block.
3819 * @since 26
3820 */
3821 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_SUPPLEMENT =
3822 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_SUPPLEMENT",
3823 "MISCELLANEOUS SYMBOLS SUPPLEMENT",
3824 "MISCELLANEOUSSYMBOLSSUPPLEMENT");
3825
3826 /**
3827 * Constant for the "Tai Yo" Unicode
3828 * character block.
3829 * @since 26
3830 */
3831 public static final UnicodeBlock TAI_YO =
3832 new UnicodeBlock("TAI_YO",
3833 "TAI YO",
3834 "TAIYO");
3835
3836 /**
3837 * Constant for the "CJK Unified Ideographs Extension J" Unicode
3838 * character block.
3839 * @since 26
3840 */
3841 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J =
3842 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J",
3843 "CJK UNIFIED IDEOGRAPHS EXTENSION J",
3844 "CJKUNIFIEDIDEOGRAPHSEXTENSIONJ");
3845
3846
3847 private static final int[] blockStarts = {
3848 0x0000, // 0000..007F; Basic Latin
3849 0x0080, // 0080..00FF; Latin-1 Supplement
3850 0x0100, // 0100..017F; Latin Extended-A
3851 0x0180, // 0180..024F; Latin Extended-B
3852 0x0250, // 0250..02AF; IPA Extensions
3853 0x02B0, // 02B0..02FF; Spacing Modifier Letters
3854 0x0300, // 0300..036F; Combining Diacritical Marks
3855 0x0370, // 0370..03FF; Greek and Coptic
3856 0x0400, // 0400..04FF; Cyrillic
3857 0x0500, // 0500..052F; Cyrillic Supplement
3858 0x0530, // 0530..058F; Armenian
3859 0x0590, // 0590..05FF; Hebrew
3860 0x0600, // 0600..06FF; Arabic
3861 0x0700, // 0700..074F; Syriac
3862 0x0750, // 0750..077F; Arabic Supplement
3863 0x0780, // 0780..07BF; Thaana
3864 0x07C0, // 07C0..07FF; NKo
3865 0x0800, // 0800..083F; Samaritan
3866 0x0840, // 0840..085F; Mandaic
3867 0x0860, // 0860..086F; Syriac Supplement
3868 0x0870, // 0870..089F; Arabic Extended-B
3869 0x08A0, // 08A0..08FF; Arabic Extended-A
3870 0x0900, // 0900..097F; Devanagari
3871 0x0980, // 0980..09FF; Bengali
3872 0x0A00, // 0A00..0A7F; Gurmukhi
3873 0x0A80, // 0A80..0AFF; Gujarati
3874 0x0B00, // 0B00..0B7F; Oriya
3875 0x0B80, // 0B80..0BFF; Tamil
3876 0x0C00, // 0C00..0C7F; Telugu
3877 0x0C80, // 0C80..0CFF; Kannada
3878 0x0D00, // 0D00..0D7F; Malayalam
3879 0x0D80, // 0D80..0DFF; Sinhala
3880 0x0E00, // 0E00..0E7F; Thai
3881 0x0E80, // 0E80..0EFF; Lao
3882 0x0F00, // 0F00..0FFF; Tibetan
3883 0x1000, // 1000..109F; Myanmar
3884 0x10A0, // 10A0..10FF; Georgian
3885 0x1100, // 1100..11FF; Hangul Jamo
3886 0x1200, // 1200..137F; Ethiopic
3887 0x1380, // 1380..139F; Ethiopic Supplement
3888 0x13A0, // 13A0..13FF; Cherokee
3889 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
3890 0x1680, // 1680..169F; Ogham
3891 0x16A0, // 16A0..16FF; Runic
3892 0x1700, // 1700..171F; Tagalog
3893 0x1720, // 1720..173F; Hanunoo
3894 0x1740, // 1740..175F; Buhid
3895 0x1760, // 1760..177F; Tagbanwa
3896 0x1780, // 1780..17FF; Khmer
3897 0x1800, // 1800..18AF; Mongolian
3898 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3899 0x1900, // 1900..194F; Limbu
3900 0x1950, // 1950..197F; Tai Le
3901 0x1980, // 1980..19DF; New Tai Lue
3902 0x19E0, // 19E0..19FF; Khmer Symbols
3903 0x1A00, // 1A00..1A1F; Buginese
3904 0x1A20, // 1A20..1AAF; Tai Tham
3905 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended
3906 0x1B00, // 1B00..1B7F; Balinese
3907 0x1B80, // 1B80..1BBF; Sundanese
3908 0x1BC0, // 1BC0..1BFF; Batak
3909 0x1C00, // 1C00..1C4F; Lepcha
3910 0x1C50, // 1C50..1C7F; Ol Chiki
3911 0x1C80, // 1C80..1C8F; Cyrillic Extended-C
3912 0x1C90, // 1C90..1CBF; Georgian Extended
3913 0x1CC0, // 1CC0..1CCF; Sundanese Supplement
3914 0x1CD0, // 1CD0..1CFF; Vedic Extensions
3915 0x1D00, // 1D00..1D7F; Phonetic Extensions
3916 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
3917 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
3918 0x1E00, // 1E00..1EFF; Latin Extended Additional
3919 0x1F00, // 1F00..1FFF; Greek Extended
3920 0x2000, // 2000..206F; General Punctuation
3921 0x2070, // 2070..209F; Superscripts and Subscripts
3922 0x20A0, // 20A0..20CF; Currency Symbols
3923 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
3924 0x2100, // 2100..214F; Letterlike Symbols
3925 0x2150, // 2150..218F; Number Forms
3926 0x2190, // 2190..21FF; Arrows
3927 0x2200, // 2200..22FF; Mathematical Operators
3928 0x2300, // 2300..23FF; Miscellaneous Technical
3929 0x2400, // 2400..243F; Control Pictures
3930 0x2440, // 2440..245F; Optical Character Recognition
3931 0x2460, // 2460..24FF; Enclosed Alphanumerics
3932 0x2500, // 2500..257F; Box Drawing
3933 0x2580, // 2580..259F; Block Elements
3934 0x25A0, // 25A0..25FF; Geometric Shapes
3935 0x2600, // 2600..26FF; Miscellaneous Symbols
3936 0x2700, // 2700..27BF; Dingbats
3937 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3938 0x27F0, // 27F0..27FF; Supplemental Arrows-A
3939 0x2800, // 2800..28FF; Braille Patterns
3940 0x2900, // 2900..297F; Supplemental Arrows-B
3941 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B
3942 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators
3943 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows
3944 0x2C00, // 2C00..2C5F; Glagolitic
3945 0x2C60, // 2C60..2C7F; Latin Extended-C
3946 0x2C80, // 2C80..2CFF; Coptic
3947 0x2D00, // 2D00..2D2F; Georgian Supplement
3948 0x2D30, // 2D30..2D7F; Tifinagh
3949 0x2D80, // 2D80..2DDF; Ethiopic Extended
3950 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A
3951 0x2E00, // 2E00..2E7F; Supplemental Punctuation
3952 0x2E80, // 2E80..2EFF; CJK Radicals Supplement
3953 0x2F00, // 2F00..2FDF; Kangxi Radicals
3954 0x2FE0, // unassigned
3955 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters
3956 0x3000, // 3000..303F; CJK Symbols and Punctuation
3957 0x3040, // 3040..309F; Hiragana
3958 0x30A0, // 30A0..30FF; Katakana
3959 0x3100, // 3100..312F; Bopomofo
3960 0x3130, // 3130..318F; Hangul Compatibility Jamo
3961 0x3190, // 3190..319F; Kanbun
3962 0x31A0, // 31A0..31BF; Bopomofo Extended
3963 0x31C0, // 31C0..31EF; CJK Strokes
3964 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions
3965 0x3200, // 3200..32FF; Enclosed CJK Letters and Months
3966 0x3300, // 3300..33FF; CJK Compatibility
3967 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
3968 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
3969 0x4E00, // 4E00..9FFF; CJK Unified Ideographs
3970 0xA000, // A000..A48F; Yi Syllables
3971 0xA490, // A490..A4CF; Yi Radicals
3972 0xA4D0, // A4D0..A4FF; Lisu
3973 0xA500, // A500..A63F; Vai
3974 0xA640, // A640..A69F; Cyrillic Extended-B
3975 0xA6A0, // A6A0..A6FF; Bamum
3976 0xA700, // A700..A71F; Modifier Tone Letters
3977 0xA720, // A720..A7FF; Latin Extended-D
3978 0xA800, // A800..A82F; Syloti Nagri
3979 0xA830, // A830..A83F; Common Indic Number Forms
3980 0xA840, // A840..A87F; Phags-pa
3981 0xA880, // A880..A8DF; Saurashtra
3982 0xA8E0, // A8E0..A8FF; Devanagari Extended
3983 0xA900, // A900..A92F; Kayah Li
3984 0xA930, // A930..A95F; Rejang
3985 0xA960, // A960..A97F; Hangul Jamo Extended-A
3986 0xA980, // A980..A9DF; Javanese
3987 0xA9E0, // A9E0..A9FF; Myanmar Extended-B
3988 0xAA00, // AA00..AA5F; Cham
3989 0xAA60, // AA60..AA7F; Myanmar Extended-A
3990 0xAA80, // AA80..AADF; Tai Viet
3991 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions
3992 0xAB00, // AB00..AB2F; Ethiopic Extended-A
3993 0xAB30, // AB30..AB6F; Latin Extended-E
3994 0xAB70, // AB70..ABBF; Cherokee Supplement
3995 0xABC0, // ABC0..ABFF; Meetei Mayek
3996 0xAC00, // AC00..D7AF; Hangul Syllables
3997 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
3998 0xD800, // D800..DB7F; High Surrogates
3999 0xDB80, // DB80..DBFF; High Private Use Surrogates
4000 0xDC00, // DC00..DFFF; Low Surrogates
4001 0xE000, // E000..F8FF; Private Use Area
4002 0xF900, // F900..FAFF; CJK Compatibility Ideographs
4003 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
4004 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
4005 0xFE00, // FE00..FE0F; Variation Selectors
4006 0xFE10, // FE10..FE1F; Vertical Forms
4007 0xFE20, // FE20..FE2F; Combining Half Marks
4008 0xFE30, // FE30..FE4F; CJK Compatibility Forms
4009 0xFE50, // FE50..FE6F; Small Form Variants
4010 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
4011 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
4012 0xFFF0, // FFF0..FFFF; Specials
4013 0x10000, // 10000..1007F; Linear B Syllabary
4014 0x10080, // 10080..100FF; Linear B Ideograms
4015 0x10100, // 10100..1013F; Aegean Numbers
4016 0x10140, // 10140..1018F; Ancient Greek Numbers
4017 0x10190, // 10190..101CF; Ancient Symbols
4018 0x101D0, // 101D0..101FF; Phaistos Disc
4019 0x10200, // unassigned
4020 0x10280, // 10280..1029F; Lycian
4021 0x102A0, // 102A0..102DF; Carian
4022 0x102E0, // 102E0..102FF; Coptic Epact Numbers
4023 0x10300, // 10300..1032F; Old Italic
4024 0x10330, // 10330..1034F; Gothic
4025 0x10350, // 10350..1037F; Old Permic
4026 0x10380, // 10380..1039F; Ugaritic
4027 0x103A0, // 103A0..103DF; Old Persian
4028 0x103E0, // unassigned
4029 0x10400, // 10400..1044F; Deseret
4030 0x10450, // 10450..1047F; Shavian
4031 0x10480, // 10480..104AF; Osmanya
4032 0x104B0, // 104B0..104FF; Osage
4033 0x10500, // 10500..1052F; Elbasan
4034 0x10530, // 10530..1056F; Caucasian Albanian
4035 0x10570, // 10570..105BF; Vithkuqi
4036 0x105C0, // 105C0..105FF; Todhri
4037 0x10600, // 10600..1077F; Linear A
4038 0x10780, // 10780..107BF; Latin Extended-F
4039 0x107C0, // unassigned
4040 0x10800, // 10800..1083F; Cypriot Syllabary
4041 0x10840, // 10840..1085F; Imperial Aramaic
4042 0x10860, // 10860..1087F; Palmyrene
4043 0x10880, // 10880..108AF; Nabataean
4044 0x108B0, // unassigned
4045 0x108E0, // 108E0..108FF; Hatran
4046 0x10900, // 10900..1091F; Phoenician
4047 0x10920, // 10920..1093F; Lydian
4048 0x10940, // 10940..1095F; Sidetic
4049 0x10960, // unassigned
4050 0x10980, // 10980..1099F; Meroitic Hieroglyphs
4051 0x109A0, // 109A0..109FF; Meroitic Cursive
4052 0x10A00, // 10A00..10A5F; Kharoshthi
4053 0x10A60, // 10A60..10A7F; Old South Arabian
4054 0x10A80, // 10A80..10A9F; Old North Arabian
4055 0x10AA0, // unassigned
4056 0x10AC0, // 10AC0..10AFF; Manichaean
4057 0x10B00, // 10B00..10B3F; Avestan
4058 0x10B40, // 10B40..10B5F; Inscriptional Parthian
4059 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
4060 0x10B80, // 10B80..10BAF; Psalter Pahlavi
4061 0x10BB0, // unassigned
4062 0x10C00, // 10C00..10C4F; Old Turkic
4063 0x10C50, // unassigned
4064 0x10C80, // 10C80..10CFF; Old Hungarian
4065 0x10D00, // 10D00..10D3F; Hanifi Rohingya
4066 0x10D40, // 10D40..10D8F; Garay
4067 0x10D90, // unassigned
4068 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
4069 0x10E80, // 10E80..10EBF; Yezidi
4070 0x10EC0, // 10EC0..10EFF; Arabic Extended-C
4071 0x10F00, // 10F00..10F2F; Old Sogdian
4072 0x10F30, // 10F30..10F6F; Sogdian
4073 0x10F70, // 10F70..10FAF; Old Uyghur
4074 0x10FB0, // 10FB0..10FDF; Chorasmian
4075 0x10FE0, // 10FE0..10FFF; Elymaic
4076 0x11000, // 11000..1107F; Brahmi
4077 0x11080, // 11080..110CF; Kaithi
4078 0x110D0, // 110D0..110FF; Sora Sompeng
4079 0x11100, // 11100..1114F; Chakma
4080 0x11150, // 11150..1117F; Mahajani
4081 0x11180, // 11180..111DF; Sharada
4082 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers
4083 0x11200, // 11200..1124F; Khojki
4084 0x11250, // unassigned
4085 0x11280, // 11280..112AF; Multani
4086 0x112B0, // 112B0..112FF; Khudawadi
4087 0x11300, // 11300..1137F; Grantha
4088 0x11380, // 11380..113FF; Tulu-Tigalari
4089 0x11400, // 11400..1147F; Newa
4090 0x11480, // 11480..114DF; Tirhuta
4091 0x114E0, // unassigned
4092 0x11580, // 11580..115FF; Siddham
4093 0x11600, // 11600..1165F; Modi
4094 0x11660, // 11660..1167F; Mongolian Supplement
4095 0x11680, // 11680..116CF; Takri
4096 0x116D0, // 116D0..116FF; Myanmar Extended-C
4097 0x11700, // 11700..1174F; Ahom
4098 0x11750, // unassigned
4099 0x11800, // 11800..1184F; Dogra
4100 0x11850, // unassigned
4101 0x118A0, // 118A0..118FF; Warang Citi
4102 0x11900, // 11900..1195F; Dives Akuru
4103 0x11960, // unassigned
4104 0x119A0, // 119A0..119FF; Nandinagari
4105 0x11A00, // 11A00..11A4F; Zanabazar Square
4106 0x11A50, // 11A50..11AAF; Soyombo
4107 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
4108 0x11AC0, // 11AC0..11AFF; Pau Cin Hau
4109 0x11B00, // 11B00..11B5F; Devanagari Extended-A
4110 0x11B60, // 11B60..11B7F; Sharada Supplement
4111 0x11B80, // unassigned
4112 0x11BC0, // 11BC0..11BFF; Sunuwar
4113 0x11C00, // 11C00..11C6F; Bhaiksuki
4114 0x11C70, // 11C70..11CBF; Marchen
4115 0x11CC0, // unassigned
4116 0x11D00, // 11D00..11D5F; Masaram Gondi
4117 0x11D60, // 11D60..11DAF; Gunjala Gondi
4118 0x11DB0, // 11DB0..11DEF; Tolong Siki
4119 0x11DF0, // unassigned
4120 0x11EE0, // 11EE0..11EFF; Makasar
4121 0x11F00, // 11F00..11F5F; Kawi
4122 0x11F60, // unassigned
4123 0x11FB0, // 11FB0..11FBF; Lisu Supplement
4124 0x11FC0, // 11FC0..11FFF; Tamil Supplement
4125 0x12000, // 12000..123FF; Cuneiform
4126 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
4127 0x12480, // 12480..1254F; Early Dynastic Cuneiform
4128 0x12550, // unassigned
4129 0x12F90, // 12F90..12FFF; Cypro-Minoan
4130 0x13000, // 13000..1342F; Egyptian Hieroglyphs
4131 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls
4132 0x13460, // 13460..143FF; Egyptian Hieroglyphs Extended-A
4133 0x14400, // 14400..1467F; Anatolian Hieroglyphs
4134 0x14680, // unassigned
4135 0x16100, // 16100..1613F; Gurung Khema
4136 0x16140, // unassigned
4137 0x16800, // 16800..16A3F; Bamum Supplement
4138 0x16A40, // 16A40..16A6F; Mro
4139 0x16A70, // 16A70..16ACF; Tangsa
4140 0x16AD0, // 16AD0..16AFF; Bassa Vah
4141 0x16B00, // 16B00..16B8F; Pahawh Hmong
4142 0x16B90, // unassigned
4143 0x16D40, // 16D40..16D7F; Kirat Rai
4144 0x16D80, // unassigned
4145 0x16E40, // 16E40..16E9F; Medefaidrin
4146 0x16EA0, // 16EA0..16EDF; Beria Erfe
4147 0x16EE0, // unassigned
4148 0x16F00, // 16F00..16F9F; Miao
4149 0x16FA0, // unassigned
4150 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation
4151 0x17000, // 17000..187FF; Tangut
4152 0x18800, // 18800..18AFF; Tangut Components
4153 0x18B00, // 18B00..18CFF; Khitan Small Script
4154 0x18D00, // 18D00..18D7F; Tangut Supplement
4155 0x18D80, // 18D80..18DFF; Tangut Components Supplement
4156 0x18E00, // unassigned
4157 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B
4158 0x1B000, // 1B000..1B0FF; Kana Supplement
4159 0x1B100, // 1B100..1B12F; Kana Extended-A
4160 0x1B130, // 1B130..1B16F; Small Kana Extension
4161 0x1B170, // 1B170..1B2FF; Nushu
4162 0x1B300, // unassigned
4163 0x1BC00, // 1BC00..1BC9F; Duployan
4164 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls
4165 0x1BCB0, // unassigned
4166 0x1CC00, // 1CC00..1CEBF; Symbols for Legacy Computing Supplement
4167 0x1CEC0, // 1CEC0..1CEFF; Miscellaneous Symbols Supplement
4168 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation
4169 0x1CFD0, // unassigned
4170 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
4171 0x1D100, // 1D100..1D1FF; Musical Symbols
4172 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
4173 0x1D250, // unassigned
4174 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals
4175 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals
4176 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
4177 0x1D360, // 1D360..1D37F; Counting Rod Numerals
4178 0x1D380, // unassigned
4179 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
4180 0x1D800, // 1D800..1DAAF; Sutton SignWriting
4181 0x1DAB0, // unassigned
4182 0x1DF00, // 1DF00..1DFFF; Latin Extended-G
4183 0x1E000, // 1E000..1E02F; Glagolitic Supplement
4184 0x1E030, // 1E030..1E08F; Cyrillic Extended-D
4185 0x1E090, // unassigned
4186 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong
4187 0x1E150, // unassigned
4188 0x1E290, // 1E290..1E2BF; Toto
4189 0x1E2C0, // 1E2C0..1E2FF; Wancho
4190 0x1E300, // unassigned
4191 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari
4192 0x1E500, // unassigned
4193 0x1E5D0, // 1E5D0..1E5FF; Ol Onal
4194 0x1E600, // unassigned
4195 0x1E6C0, // 1E6C0..1E6FF; Tai Yo
4196 0x1E700, // unassigned
4197 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B
4198 0x1E800, // 1E800..1E8DF; Mende Kikakui
4199 0x1E8E0, // unassigned
4200 0x1E900, // 1E900..1E95F; Adlam
4201 0x1E960, // unassigned
4202 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers
4203 0x1ECC0, // unassigned
4204 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers
4205 0x1ED50, // unassigned
4206 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
4207 0x1EF00, // unassigned
4208 0x1F000, // 1F000..1F02F; Mahjong Tiles
4209 0x1F030, // 1F030..1F09F; Domino Tiles
4210 0x1F0A0, // 1F0A0..1F0FF; Playing Cards
4211 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
4212 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
4213 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
4214 0x1F600, // 1F600..1F64F; Emoticons
4215 0x1F650, // 1F650..1F67F; Ornamental Dingbats
4216 0x1F680, // 1F680..1F6FF; Transport and Map Symbols
4217 0x1F700, // 1F700..1F77F; Alchemical Symbols
4218 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended
4219 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C
4220 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs
4221 0x1FA00, // 1FA00..1FA6F; Chess Symbols
4222 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A
4223 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing
4224 0x1FC00, // unassigned
4225 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
4226 0x2A6E0, // unassigned
4227 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
4228 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
4229 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
4230 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
4231 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I
4232 0x2EE60, // unassigned
4233 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
4234 0x2FA20, // unassigned
4235 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G
4236 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H
4237 0x323B0, // 323B0..3347F; CJK Unified Ideographs Extension J
4238 0x33480, // unassigned
4239 0xE0000, // E0000..E007F; Tags
4240 0xE0080, // unassigned
4241 0xE0100, // E0100..E01EF; Variation Selectors Supplement
4242 0xE01F0, // unassigned
4243 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
4244 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
4245 };
4246
4247 private static final UnicodeBlock[] blocks = {
4248 BASIC_LATIN,
4249 LATIN_1_SUPPLEMENT,
4250 LATIN_EXTENDED_A,
4251 LATIN_EXTENDED_B,
4252 IPA_EXTENSIONS,
4253 SPACING_MODIFIER_LETTERS,
4254 COMBINING_DIACRITICAL_MARKS,
4255 GREEK,
4256 CYRILLIC,
4257 CYRILLIC_SUPPLEMENTARY,
4258 ARMENIAN,
4259 HEBREW,
4260 ARABIC,
4261 SYRIAC,
4262 ARABIC_SUPPLEMENT,
4263 THAANA,
4264 NKO,
4265 SAMARITAN,
4266 MANDAIC,
4267 SYRIAC_SUPPLEMENT,
4268 ARABIC_EXTENDED_B,
4269 ARABIC_EXTENDED_A,
4270 DEVANAGARI,
4271 BENGALI,
4272 GURMUKHI,
4273 GUJARATI,
4274 ORIYA,
4275 TAMIL,
4276 TELUGU,
4277 KANNADA,
4278 MALAYALAM,
4279 SINHALA,
4280 THAI,
4281 LAO,
4282 TIBETAN,
4283 MYANMAR,
4284 GEORGIAN,
4285 HANGUL_JAMO,
4286 ETHIOPIC,
4287 ETHIOPIC_SUPPLEMENT,
4288 CHEROKEE,
4289 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
4290 OGHAM,
4291 RUNIC,
4292 TAGALOG,
4293 HANUNOO,
4294 BUHID,
4295 TAGBANWA,
4296 KHMER,
4297 MONGOLIAN,
4298 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
4299 LIMBU,
4300 TAI_LE,
4301 NEW_TAI_LUE,
4302 KHMER_SYMBOLS,
4303 BUGINESE,
4304 TAI_THAM,
4305 COMBINING_DIACRITICAL_MARKS_EXTENDED,
4306 BALINESE,
4307 SUNDANESE,
4308 BATAK,
4309 LEPCHA,
4310 OL_CHIKI,
4311 CYRILLIC_EXTENDED_C,
4312 GEORGIAN_EXTENDED,
4313 SUNDANESE_SUPPLEMENT,
4314 VEDIC_EXTENSIONS,
4315 PHONETIC_EXTENSIONS,
4316 PHONETIC_EXTENSIONS_SUPPLEMENT,
4317 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
4318 LATIN_EXTENDED_ADDITIONAL,
4319 GREEK_EXTENDED,
4320 GENERAL_PUNCTUATION,
4321 SUPERSCRIPTS_AND_SUBSCRIPTS,
4322 CURRENCY_SYMBOLS,
4323 COMBINING_MARKS_FOR_SYMBOLS,
4324 LETTERLIKE_SYMBOLS,
4325 NUMBER_FORMS,
4326 ARROWS,
4327 MATHEMATICAL_OPERATORS,
4328 MISCELLANEOUS_TECHNICAL,
4329 CONTROL_PICTURES,
4330 OPTICAL_CHARACTER_RECOGNITION,
4331 ENCLOSED_ALPHANUMERICS,
4332 BOX_DRAWING,
4333 BLOCK_ELEMENTS,
4334 GEOMETRIC_SHAPES,
4335 MISCELLANEOUS_SYMBOLS,
4336 DINGBATS,
4337 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
4338 SUPPLEMENTAL_ARROWS_A,
4339 BRAILLE_PATTERNS,
4340 SUPPLEMENTAL_ARROWS_B,
4341 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
4342 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
4343 MISCELLANEOUS_SYMBOLS_AND_ARROWS,
4344 GLAGOLITIC,
4345 LATIN_EXTENDED_C,
4346 COPTIC,
4347 GEORGIAN_SUPPLEMENT,
4348 TIFINAGH,
4349 ETHIOPIC_EXTENDED,
4350 CYRILLIC_EXTENDED_A,
4351 SUPPLEMENTAL_PUNCTUATION,
4352 CJK_RADICALS_SUPPLEMENT,
4353 KANGXI_RADICALS,
4354 null,
4355 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
4356 CJK_SYMBOLS_AND_PUNCTUATION,
4357 HIRAGANA,
4358 KATAKANA,
4359 BOPOMOFO,
4360 HANGUL_COMPATIBILITY_JAMO,
4361 KANBUN,
4362 BOPOMOFO_EXTENDED,
4363 CJK_STROKES,
4364 KATAKANA_PHONETIC_EXTENSIONS,
4365 ENCLOSED_CJK_LETTERS_AND_MONTHS,
4366 CJK_COMPATIBILITY,
4367 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
4368 YIJING_HEXAGRAM_SYMBOLS,
4369 CJK_UNIFIED_IDEOGRAPHS,
4370 YI_SYLLABLES,
4371 YI_RADICALS,
4372 LISU,
4373 VAI,
4374 CYRILLIC_EXTENDED_B,
4375 BAMUM,
4376 MODIFIER_TONE_LETTERS,
4377 LATIN_EXTENDED_D,
4378 SYLOTI_NAGRI,
4379 COMMON_INDIC_NUMBER_FORMS,
4380 PHAGS_PA,
4381 SAURASHTRA,
4382 DEVANAGARI_EXTENDED,
4383 KAYAH_LI,
4384 REJANG,
4385 HANGUL_JAMO_EXTENDED_A,
4386 JAVANESE,
4387 MYANMAR_EXTENDED_B,
4388 CHAM,
4389 MYANMAR_EXTENDED_A,
4390 TAI_VIET,
4391 MEETEI_MAYEK_EXTENSIONS,
4392 ETHIOPIC_EXTENDED_A,
4393 LATIN_EXTENDED_E,
4394 CHEROKEE_SUPPLEMENT,
4395 MEETEI_MAYEK,
4396 HANGUL_SYLLABLES,
4397 HANGUL_JAMO_EXTENDED_B,
4398 HIGH_SURROGATES,
4399 HIGH_PRIVATE_USE_SURROGATES,
4400 LOW_SURROGATES,
4401 PRIVATE_USE_AREA,
4402 CJK_COMPATIBILITY_IDEOGRAPHS,
4403 ALPHABETIC_PRESENTATION_FORMS,
4404 ARABIC_PRESENTATION_FORMS_A,
4405 VARIATION_SELECTORS,
4406 VERTICAL_FORMS,
4407 COMBINING_HALF_MARKS,
4408 CJK_COMPATIBILITY_FORMS,
4409 SMALL_FORM_VARIANTS,
4410 ARABIC_PRESENTATION_FORMS_B,
4411 HALFWIDTH_AND_FULLWIDTH_FORMS,
4412 SPECIALS,
4413 LINEAR_B_SYLLABARY,
4414 LINEAR_B_IDEOGRAMS,
4415 AEGEAN_NUMBERS,
4416 ANCIENT_GREEK_NUMBERS,
4417 ANCIENT_SYMBOLS,
4418 PHAISTOS_DISC,
4419 null,
4420 LYCIAN,
4421 CARIAN,
4422 COPTIC_EPACT_NUMBERS,
4423 OLD_ITALIC,
4424 GOTHIC,
4425 OLD_PERMIC,
4426 UGARITIC,
4427 OLD_PERSIAN,
4428 null,
4429 DESERET,
4430 SHAVIAN,
4431 OSMANYA,
4432 OSAGE,
4433 ELBASAN,
4434 CAUCASIAN_ALBANIAN,
4435 VITHKUQI,
4436 TODHRI,
4437 LINEAR_A,
4438 LATIN_EXTENDED_F,
4439 null,
4440 CYPRIOT_SYLLABARY,
4441 IMPERIAL_ARAMAIC,
4442 PALMYRENE,
4443 NABATAEAN,
4444 null,
4445 HATRAN,
4446 PHOENICIAN,
4447 LYDIAN,
4448 SIDETIC,
4449 null,
4450 MEROITIC_HIEROGLYPHS,
4451 MEROITIC_CURSIVE,
4452 KHAROSHTHI,
4453 OLD_SOUTH_ARABIAN,
4454 OLD_NORTH_ARABIAN,
4455 null,
4456 MANICHAEAN,
4457 AVESTAN,
4458 INSCRIPTIONAL_PARTHIAN,
4459 INSCRIPTIONAL_PAHLAVI,
4460 PSALTER_PAHLAVI,
4461 null,
4462 OLD_TURKIC,
4463 null,
4464 OLD_HUNGARIAN,
4465 HANIFI_ROHINGYA,
4466 GARAY,
4467 null,
4468 RUMI_NUMERAL_SYMBOLS,
4469 YEZIDI,
4470 ARABIC_EXTENDED_C,
4471 OLD_SOGDIAN,
4472 SOGDIAN,
4473 OLD_UYGHUR,
4474 CHORASMIAN,
4475 ELYMAIC,
4476 BRAHMI,
4477 KAITHI,
4478 SORA_SOMPENG,
4479 CHAKMA,
4480 MAHAJANI,
4481 SHARADA,
4482 SINHALA_ARCHAIC_NUMBERS,
4483 KHOJKI,
4484 null,
4485 MULTANI,
4486 KHUDAWADI,
4487 GRANTHA,
4488 TULU_TIGALARI,
4489 NEWA,
4490 TIRHUTA,
4491 null,
4492 SIDDHAM,
4493 MODI,
4494 MONGOLIAN_SUPPLEMENT,
4495 TAKRI,
4496 MYANMAR_EXTENDED_C,
4497 AHOM,
4498 null,
4499 DOGRA,
4500 null,
4501 WARANG_CITI,
4502 DIVES_AKURU,
4503 null,
4504 NANDINAGARI,
4505 ZANABAZAR_SQUARE,
4506 SOYOMBO,
4507 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A,
4508 PAU_CIN_HAU,
4509 DEVANAGARI_EXTENDED_A,
4510 SHARADA_SUPPLEMENT,
4511 null,
4512 SUNUWAR,
4513 BHAIKSUKI,
4514 MARCHEN,
4515 null,
4516 MASARAM_GONDI,
4517 GUNJALA_GONDI,
4518 TOLONG_SIKI,
4519 null,
4520 MAKASAR,
4521 KAWI,
4522 null,
4523 LISU_SUPPLEMENT,
4524 TAMIL_SUPPLEMENT,
4525 CUNEIFORM,
4526 CUNEIFORM_NUMBERS_AND_PUNCTUATION,
4527 EARLY_DYNASTIC_CUNEIFORM,
4528 null,
4529 CYPRO_MINOAN,
4530 EGYPTIAN_HIEROGLYPHS,
4531 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
4532 EGYPTIAN_HIEROGLYPHS_EXTENDED_A,
4533 ANATOLIAN_HIEROGLYPHS,
4534 null,
4535 GURUNG_KHEMA,
4536 null,
4537 BAMUM_SUPPLEMENT,
4538 MRO,
4539 TANGSA,
4540 BASSA_VAH,
4541 PAHAWH_HMONG,
4542 null,
4543 KIRAT_RAI,
4544 null,
4545 MEDEFAIDRIN,
4546 BERIA_ERFE,
4547 null,
4548 MIAO,
4549 null,
4550 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
4551 TANGUT,
4552 TANGUT_COMPONENTS,
4553 KHITAN_SMALL_SCRIPT,
4554 TANGUT_SUPPLEMENT,
4555 TANGUT_COMPONENTS_SUPPLEMENT,
4556 null,
4557 KANA_EXTENDED_B,
4558 KANA_SUPPLEMENT,
4559 KANA_EXTENDED_A,
4560 SMALL_KANA_EXTENSION,
4561 NUSHU,
4562 null,
4563 DUPLOYAN,
4564 SHORTHAND_FORMAT_CONTROLS,
4565 null,
4566 SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT,
4567 MISCELLANEOUS_SYMBOLS_SUPPLEMENT,
4568 ZNAMENNY_MUSICAL_NOTATION,
4569 null,
4570 BYZANTINE_MUSICAL_SYMBOLS,
4571 MUSICAL_SYMBOLS,
4572 ANCIENT_GREEK_MUSICAL_NOTATION,
4573 null,
4574 KAKTOVIK_NUMERALS,
4575 MAYAN_NUMERALS,
4576 TAI_XUAN_JING_SYMBOLS,
4577 COUNTING_ROD_NUMERALS,
4578 null,
4579 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
4580 SUTTON_SIGNWRITING,
4581 null,
4582 LATIN_EXTENDED_G,
4583 GLAGOLITIC_SUPPLEMENT,
4584 CYRILLIC_EXTENDED_D,
4585 null,
4586 NYIAKENG_PUACHUE_HMONG,
4587 null,
4588 TOTO,
4589 WANCHO,
4590 null,
4591 NAG_MUNDARI,
4592 null,
4593 OL_ONAL,
4594 null,
4595 TAI_YO,
4596 null,
4597 ETHIOPIC_EXTENDED_B,
4598 MENDE_KIKAKUI,
4599 null,
4600 ADLAM,
4601 null,
4602 INDIC_SIYAQ_NUMBERS,
4603 null,
4604 OTTOMAN_SIYAQ_NUMBERS,
4605 null,
4606 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
4607 null,
4608 MAHJONG_TILES,
4609 DOMINO_TILES,
4610 PLAYING_CARDS,
4611 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
4612 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
4613 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
4614 EMOTICONS,
4615 ORNAMENTAL_DINGBATS,
4616 TRANSPORT_AND_MAP_SYMBOLS,
4617 ALCHEMICAL_SYMBOLS,
4618 GEOMETRIC_SHAPES_EXTENDED,
4619 SUPPLEMENTAL_ARROWS_C,
4620 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
4621 CHESS_SYMBOLS,
4622 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
4623 SYMBOLS_FOR_LEGACY_COMPUTING,
4624 null,
4625 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
4626 null,
4627 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
4628 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
4629 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
4630 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
4631 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I,
4632 null,
4633 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
4634 null,
4635 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G,
4636 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H,
4637 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J,
4638 null,
4639 TAGS,
4640 null,
4641 VARIATION_SELECTORS_SUPPLEMENT,
4642 null,
4643 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
4644 SUPPLEMENTARY_PRIVATE_USE_AREA_B,
4645 };
4646
4647
4648 /**
4649 * Returns the object representing the Unicode block containing the
4650 * given character, or {@code null} if the character is not a
4651 * member of a defined block.
4652 *
4653 * <p><b>Note:</b> This method cannot handle
4654 * <a href="Character.html#supplementary"> supplementary
4655 * characters</a>. To support all Unicode characters, including
4656 * supplementary characters, use the {@link #of(int)} method.
4657 *
4658 * @param c The character in question
4659 * @return The {@code UnicodeBlock} instance representing the
4660 * Unicode block of which this character is a member, or
4661 * {@code null} if the character is not a member of any
4662 * Unicode block
4663 */
4664 public static UnicodeBlock of(char c) {
4665 return of((int)c);
4666 }
4667
4668 /**
4669 * Returns the object representing the Unicode block
4670 * containing the given character (Unicode code point), or
4671 * {@code null} if the character is not a member of a
4672 * defined block.
4673 *
4674 * @param codePoint the character (Unicode code point) in question.
4675 * @return The {@code UnicodeBlock} instance representing the
4676 * Unicode block of which this character is a member, or
4677 * {@code null} if the character is not a member of any
4678 * Unicode block
4679 * @throws IllegalArgumentException if the specified
4680 * {@code codePoint} is an invalid Unicode code point.
4681 * @see Character#isValidCodePoint(int)
4682 * @since 1.5
4683 */
4684 public static UnicodeBlock of(int codePoint) {
4685 if (!isValidCodePoint(codePoint)) {
4686 throw new IllegalArgumentException(
4687 String.format("Not a valid Unicode code point: 0x%X", codePoint));
4688 }
4689
4690 int top, bottom, current;
4691 bottom = 0;
4692 top = blockStarts.length;
4693 current = top/2;
4694
4695 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4696 while (top - bottom > 1) {
4697 if (codePoint >= blockStarts[current]) {
4698 bottom = current;
4699 } else {
4700 top = current;
4701 }
4702 current = (top + bottom) / 2;
4703 }
4704 return blocks[current];
4705 }
4706
4707 /**
4708 * Returns the UnicodeBlock with the given name. Block
4709 * names are determined by The Unicode Standard. The file
4710 * {@code Blocks.txt} defines blocks for a particular
4711 * version of the standard. The {@link Character} class specifies
4712 * the version of the standard that it supports.
4713 * <p>
4714 * This method accepts block names in the following forms:
4715 * <ol>
4716 * <li> Canonical block names as defined by the Unicode Standard.
4717 * For example, the standard defines a "Basic Latin" block. Therefore, this
4718 * method accepts "Basic Latin" as a valid block name. The documentation of
4719 * each UnicodeBlock provides the canonical name.
4720 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4721 * is a valid block name for the "Basic Latin" block.
4722 * <li>The text representation of each constant UnicodeBlock identifier.
4723 * For example, this method will return the {@link #BASIC_LATIN} block if
4724 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
4725 * hyphens in the canonical name with underscores.
4726 * </ol>
4727 * Finally, character case is ignored for all of the valid block name forms.
4728 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4729 * The en_US locale's case mapping rules are used to provide case-insensitive
4730 * string comparisons for block name validation.
4731 * <p>
4732 * If the Unicode Standard changes block names, both the previous and
4733 * current names will be accepted.
4734 *
4735 * @param blockName A {@code UnicodeBlock} name.
4736 * @return The {@code UnicodeBlock} instance identified
4737 * by {@code blockName}
4738 * @throws IllegalArgumentException if {@code blockName} is an
4739 * invalid name
4740 * @throws NullPointerException if {@code blockName} is null
4741 * @since 1.5
4742 */
4743 public static final UnicodeBlock forName(String blockName) {
4744 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4745 if (block == null) {
4746 throw new IllegalArgumentException("Not a valid block name: "
4747 + blockName);
4748 }
4749 return block;
4750 }
4751 }
4752
4753
4754 /**
4755 * A family of character subsets representing the character scripts
4756 * defined in the <a href="http://www.unicode.org/reports/tr24/">
4757 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4758 * character is assigned to a single Unicode script, either a specific
4759 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4760 * one of the following three special values,
4761 * {@link Character.UnicodeScript#INHERITED Inherited},
4762 * {@link Character.UnicodeScript#COMMON Common} or
4763 * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4764 *
4765 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property
4766 * @since 1.7
4767 */
4768 public static enum UnicodeScript {
4769
4770 /**
4771 * Unicode script "Common".
4772 */
4773 COMMON,
4774
4775 /**
4776 * Unicode script "Latin".
4777 */
4778 LATIN,
4779
4780 /**
4781 * Unicode script "Greek".
4782 */
4783 GREEK,
4784
4785 /**
4786 * Unicode script "Cyrillic".
4787 */
4788 CYRILLIC,
4789
4790 /**
4791 * Unicode script "Armenian".
4792 */
4793 ARMENIAN,
4794
4795 /**
4796 * Unicode script "Hebrew".
4797 */
4798 HEBREW,
4799
4800 /**
4801 * Unicode script "Arabic".
4802 */
4803 ARABIC,
4804
4805 /**
4806 * Unicode script "Syriac".
4807 */
4808 SYRIAC,
4809
4810 /**
4811 * Unicode script "Thaana".
4812 */
4813 THAANA,
4814
4815 /**
4816 * Unicode script "Devanagari".
4817 */
4818 DEVANAGARI,
4819
4820 /**
4821 * Unicode script "Bengali".
4822 */
4823 BENGALI,
4824
4825 /**
4826 * Unicode script "Gurmukhi".
4827 */
4828 GURMUKHI,
4829
4830 /**
4831 * Unicode script "Gujarati".
4832 */
4833 GUJARATI,
4834
4835 /**
4836 * Unicode script "Oriya".
4837 */
4838 ORIYA,
4839
4840 /**
4841 * Unicode script "Tamil".
4842 */
4843 TAMIL,
4844
4845 /**
4846 * Unicode script "Telugu".
4847 */
4848 TELUGU,
4849
4850 /**
4851 * Unicode script "Kannada".
4852 */
4853 KANNADA,
4854
4855 /**
4856 * Unicode script "Malayalam".
4857 */
4858 MALAYALAM,
4859
4860 /**
4861 * Unicode script "Sinhala".
4862 */
4863 SINHALA,
4864
4865 /**
4866 * Unicode script "Thai".
4867 */
4868 THAI,
4869
4870 /**
4871 * Unicode script "Lao".
4872 */
4873 LAO,
4874
4875 /**
4876 * Unicode script "Tibetan".
4877 */
4878 TIBETAN,
4879
4880 /**
4881 * Unicode script "Myanmar".
4882 */
4883 MYANMAR,
4884
4885 /**
4886 * Unicode script "Georgian".
4887 */
4888 GEORGIAN,
4889
4890 /**
4891 * Unicode script "Hangul".
4892 */
4893 HANGUL,
4894
4895 /**
4896 * Unicode script "Ethiopic".
4897 */
4898 ETHIOPIC,
4899
4900 /**
4901 * Unicode script "Cherokee".
4902 */
4903 CHEROKEE,
4904
4905 /**
4906 * Unicode script "Canadian_Aboriginal".
4907 */
4908 CANADIAN_ABORIGINAL,
4909
4910 /**
4911 * Unicode script "Ogham".
4912 */
4913 OGHAM,
4914
4915 /**
4916 * Unicode script "Runic".
4917 */
4918 RUNIC,
4919
4920 /**
4921 * Unicode script "Khmer".
4922 */
4923 KHMER,
4924
4925 /**
4926 * Unicode script "Mongolian".
4927 */
4928 MONGOLIAN,
4929
4930 /**
4931 * Unicode script "Hiragana".
4932 */
4933 HIRAGANA,
4934
4935 /**
4936 * Unicode script "Katakana".
4937 */
4938 KATAKANA,
4939
4940 /**
4941 * Unicode script "Bopomofo".
4942 */
4943 BOPOMOFO,
4944
4945 /**
4946 * Unicode script "Han".
4947 */
4948 HAN,
4949
4950 /**
4951 * Unicode script "Yi".
4952 */
4953 YI,
4954
4955 /**
4956 * Unicode script "Old_Italic".
4957 */
4958 OLD_ITALIC,
4959
4960 /**
4961 * Unicode script "Gothic".
4962 */
4963 GOTHIC,
4964
4965 /**
4966 * Unicode script "Deseret".
4967 */
4968 DESERET,
4969
4970 /**
4971 * Unicode script "Inherited".
4972 */
4973 INHERITED,
4974
4975 /**
4976 * Unicode script "Tagalog".
4977 */
4978 TAGALOG,
4979
4980 /**
4981 * Unicode script "Hanunoo".
4982 */
4983 HANUNOO,
4984
4985 /**
4986 * Unicode script "Buhid".
4987 */
4988 BUHID,
4989
4990 /**
4991 * Unicode script "Tagbanwa".
4992 */
4993 TAGBANWA,
4994
4995 /**
4996 * Unicode script "Limbu".
4997 */
4998 LIMBU,
4999
5000 /**
5001 * Unicode script "Tai_Le".
5002 */
5003 TAI_LE,
5004
5005 /**
5006 * Unicode script "Linear_B".
5007 */
5008 LINEAR_B,
5009
5010 /**
5011 * Unicode script "Ugaritic".
5012 */
5013 UGARITIC,
5014
5015 /**
5016 * Unicode script "Shavian".
5017 */
5018 SHAVIAN,
5019
5020 /**
5021 * Unicode script "Osmanya".
5022 */
5023 OSMANYA,
5024
5025 /**
5026 * Unicode script "Cypriot".
5027 */
5028 CYPRIOT,
5029
5030 /**
5031 * Unicode script "Braille".
5032 */
5033 BRAILLE,
5034
5035 /**
5036 * Unicode script "Buginese".
5037 */
5038 BUGINESE,
5039
5040 /**
5041 * Unicode script "Coptic".
5042 */
5043 COPTIC,
5044
5045 /**
5046 * Unicode script "New_Tai_Lue".
5047 */
5048 NEW_TAI_LUE,
5049
5050 /**
5051 * Unicode script "Glagolitic".
5052 */
5053 GLAGOLITIC,
5054
5055 /**
5056 * Unicode script "Tifinagh".
5057 */
5058 TIFINAGH,
5059
5060 /**
5061 * Unicode script "Syloti_Nagri".
5062 */
5063 SYLOTI_NAGRI,
5064
5065 /**
5066 * Unicode script "Old_Persian".
5067 */
5068 OLD_PERSIAN,
5069
5070 /**
5071 * Unicode script "Kharoshthi".
5072 */
5073 KHAROSHTHI,
5074
5075 /**
5076 * Unicode script "Balinese".
5077 */
5078 BALINESE,
5079
5080 /**
5081 * Unicode script "Cuneiform".
5082 */
5083 CUNEIFORM,
5084
5085 /**
5086 * Unicode script "Phoenician".
5087 */
5088 PHOENICIAN,
5089
5090 /**
5091 * Unicode script "Phags_Pa".
5092 */
5093 PHAGS_PA,
5094
5095 /**
5096 * Unicode script "Nko".
5097 */
5098 NKO,
5099
5100 /**
5101 * Unicode script "Sundanese".
5102 */
5103 SUNDANESE,
5104
5105 /**
5106 * Unicode script "Batak".
5107 */
5108 BATAK,
5109
5110 /**
5111 * Unicode script "Lepcha".
5112 */
5113 LEPCHA,
5114
5115 /**
5116 * Unicode script "Ol_Chiki".
5117 */
5118 OL_CHIKI,
5119
5120 /**
5121 * Unicode script "Vai".
5122 */
5123 VAI,
5124
5125 /**
5126 * Unicode script "Saurashtra".
5127 */
5128 SAURASHTRA,
5129
5130 /**
5131 * Unicode script "Kayah_Li".
5132 */
5133 KAYAH_LI,
5134
5135 /**
5136 * Unicode script "Rejang".
5137 */
5138 REJANG,
5139
5140 /**
5141 * Unicode script "Lycian".
5142 */
5143 LYCIAN,
5144
5145 /**
5146 * Unicode script "Carian".
5147 */
5148 CARIAN,
5149
5150 /**
5151 * Unicode script "Lydian".
5152 */
5153 LYDIAN,
5154
5155 /**
5156 * Unicode script "Cham".
5157 */
5158 CHAM,
5159
5160 /**
5161 * Unicode script "Tai_Tham".
5162 */
5163 TAI_THAM,
5164
5165 /**
5166 * Unicode script "Tai_Viet".
5167 */
5168 TAI_VIET,
5169
5170 /**
5171 * Unicode script "Avestan".
5172 */
5173 AVESTAN,
5174
5175 /**
5176 * Unicode script "Egyptian_Hieroglyphs".
5177 */
5178 EGYPTIAN_HIEROGLYPHS,
5179
5180 /**
5181 * Unicode script "Samaritan".
5182 */
5183 SAMARITAN,
5184
5185 /**
5186 * Unicode script "Mandaic".
5187 */
5188 MANDAIC,
5189
5190 /**
5191 * Unicode script "Lisu".
5192 */
5193 LISU,
5194
5195 /**
5196 * Unicode script "Bamum".
5197 */
5198 BAMUM,
5199
5200 /**
5201 * Unicode script "Javanese".
5202 */
5203 JAVANESE,
5204
5205 /**
5206 * Unicode script "Meetei_Mayek".
5207 */
5208 MEETEI_MAYEK,
5209
5210 /**
5211 * Unicode script "Imperial_Aramaic".
5212 */
5213 IMPERIAL_ARAMAIC,
5214
5215 /**
5216 * Unicode script "Old_South_Arabian".
5217 */
5218 OLD_SOUTH_ARABIAN,
5219
5220 /**
5221 * Unicode script "Inscriptional_Parthian".
5222 */
5223 INSCRIPTIONAL_PARTHIAN,
5224
5225 /**
5226 * Unicode script "Inscriptional_Pahlavi".
5227 */
5228 INSCRIPTIONAL_PAHLAVI,
5229
5230 /**
5231 * Unicode script "Old_Turkic".
5232 */
5233 OLD_TURKIC,
5234
5235 /**
5236 * Unicode script "Brahmi".
5237 */
5238 BRAHMI,
5239
5240 /**
5241 * Unicode script "Kaithi".
5242 */
5243 KAITHI,
5244
5245 /**
5246 * Unicode script "Meroitic Hieroglyphs".
5247 * @since 1.8
5248 */
5249 MEROITIC_HIEROGLYPHS,
5250
5251 /**
5252 * Unicode script "Meroitic Cursive".
5253 * @since 1.8
5254 */
5255 MEROITIC_CURSIVE,
5256
5257 /**
5258 * Unicode script "Sora Sompeng".
5259 * @since 1.8
5260 */
5261 SORA_SOMPENG,
5262
5263 /**
5264 * Unicode script "Chakma".
5265 * @since 1.8
5266 */
5267 CHAKMA,
5268
5269 /**
5270 * Unicode script "Sharada".
5271 * @since 1.8
5272 */
5273 SHARADA,
5274
5275 /**
5276 * Unicode script "Takri".
5277 * @since 1.8
5278 */
5279 TAKRI,
5280
5281 /**
5282 * Unicode script "Miao".
5283 * @since 1.8
5284 */
5285 MIAO,
5286
5287 /**
5288 * Unicode script "Caucasian Albanian".
5289 * @since 9
5290 */
5291 CAUCASIAN_ALBANIAN,
5292
5293 /**
5294 * Unicode script "Bassa Vah".
5295 * @since 9
5296 */
5297 BASSA_VAH,
5298
5299 /**
5300 * Unicode script "Duployan".
5301 * @since 9
5302 */
5303 DUPLOYAN,
5304
5305 /**
5306 * Unicode script "Elbasan".
5307 * @since 9
5308 */
5309 ELBASAN,
5310
5311 /**
5312 * Unicode script "Grantha".
5313 * @since 9
5314 */
5315 GRANTHA,
5316
5317 /**
5318 * Unicode script "Pahawh Hmong".
5319 * @since 9
5320 */
5321 PAHAWH_HMONG,
5322
5323 /**
5324 * Unicode script "Khojki".
5325 * @since 9
5326 */
5327 KHOJKI,
5328
5329 /**
5330 * Unicode script "Linear A".
5331 * @since 9
5332 */
5333 LINEAR_A,
5334
5335 /**
5336 * Unicode script "Mahajani".
5337 * @since 9
5338 */
5339 MAHAJANI,
5340
5341 /**
5342 * Unicode script "Manichaean".
5343 * @since 9
5344 */
5345 MANICHAEAN,
5346
5347 /**
5348 * Unicode script "Mende Kikakui".
5349 * @since 9
5350 */
5351 MENDE_KIKAKUI,
5352
5353 /**
5354 * Unicode script "Modi".
5355 * @since 9
5356 */
5357 MODI,
5358
5359 /**
5360 * Unicode script "Mro".
5361 * @since 9
5362 */
5363 MRO,
5364
5365 /**
5366 * Unicode script "Old North Arabian".
5367 * @since 9
5368 */
5369 OLD_NORTH_ARABIAN,
5370
5371 /**
5372 * Unicode script "Nabataean".
5373 * @since 9
5374 */
5375 NABATAEAN,
5376
5377 /**
5378 * Unicode script "Palmyrene".
5379 * @since 9
5380 */
5381 PALMYRENE,
5382
5383 /**
5384 * Unicode script "Pau Cin Hau".
5385 * @since 9
5386 */
5387 PAU_CIN_HAU,
5388
5389 /**
5390 * Unicode script "Old Permic".
5391 * @since 9
5392 */
5393 OLD_PERMIC,
5394
5395 /**
5396 * Unicode script "Psalter Pahlavi".
5397 * @since 9
5398 */
5399 PSALTER_PAHLAVI,
5400
5401 /**
5402 * Unicode script "Siddham".
5403 * @since 9
5404 */
5405 SIDDHAM,
5406
5407 /**
5408 * Unicode script "Khudawadi".
5409 * @since 9
5410 */
5411 KHUDAWADI,
5412
5413 /**
5414 * Unicode script "Tirhuta".
5415 * @since 9
5416 */
5417 TIRHUTA,
5418
5419 /**
5420 * Unicode script "Warang Citi".
5421 * @since 9
5422 */
5423 WARANG_CITI,
5424
5425 /**
5426 * Unicode script "Ahom".
5427 * @since 9
5428 */
5429 AHOM,
5430
5431 /**
5432 * Unicode script "Anatolian Hieroglyphs".
5433 * @since 9
5434 */
5435 ANATOLIAN_HIEROGLYPHS,
5436
5437 /**
5438 * Unicode script "Hatran".
5439 * @since 9
5440 */
5441 HATRAN,
5442
5443 /**
5444 * Unicode script "Multani".
5445 * @since 9
5446 */
5447 MULTANI,
5448
5449 /**
5450 * Unicode script "Old Hungarian".
5451 * @since 9
5452 */
5453 OLD_HUNGARIAN,
5454
5455 /**
5456 * Unicode script "SignWriting".
5457 * @since 9
5458 */
5459 SIGNWRITING,
5460
5461 /**
5462 * Unicode script "Adlam".
5463 * @since 11
5464 */
5465 ADLAM,
5466
5467 /**
5468 * Unicode script "Bhaiksuki".
5469 * @since 11
5470 */
5471 BHAIKSUKI,
5472
5473 /**
5474 * Unicode script "Marchen".
5475 * @since 11
5476 */
5477 MARCHEN,
5478
5479 /**
5480 * Unicode script "Newa".
5481 * @since 11
5482 */
5483 NEWA,
5484
5485 /**
5486 * Unicode script "Osage".
5487 * @since 11
5488 */
5489 OSAGE,
5490
5491 /**
5492 * Unicode script "Tangut".
5493 * @since 11
5494 */
5495 TANGUT,
5496
5497 /**
5498 * Unicode script "Masaram Gondi".
5499 * @since 11
5500 */
5501 MASARAM_GONDI,
5502
5503 /**
5504 * Unicode script "Nushu".
5505 * @since 11
5506 */
5507 NUSHU,
5508
5509 /**
5510 * Unicode script "Soyombo".
5511 * @since 11
5512 */
5513 SOYOMBO,
5514
5515 /**
5516 * Unicode script "Zanabazar Square".
5517 * @since 11
5518 */
5519 ZANABAZAR_SQUARE,
5520
5521 /**
5522 * Unicode script "Hanifi Rohingya".
5523 * @since 12
5524 */
5525 HANIFI_ROHINGYA,
5526
5527 /**
5528 * Unicode script "Old Sogdian".
5529 * @since 12
5530 */
5531 OLD_SOGDIAN,
5532
5533 /**
5534 * Unicode script "Sogdian".
5535 * @since 12
5536 */
5537 SOGDIAN,
5538
5539 /**
5540 * Unicode script "Dogra".
5541 * @since 12
5542 */
5543 DOGRA,
5544
5545 /**
5546 * Unicode script "Gunjala Gondi".
5547 * @since 12
5548 */
5549 GUNJALA_GONDI,
5550
5551 /**
5552 * Unicode script "Makasar".
5553 * @since 12
5554 */
5555 MAKASAR,
5556
5557 /**
5558 * Unicode script "Medefaidrin".
5559 * @since 12
5560 */
5561 MEDEFAIDRIN,
5562
5563 /**
5564 * Unicode script "Elymaic".
5565 * @since 13
5566 */
5567 ELYMAIC,
5568
5569 /**
5570 * Unicode script "Nandinagari".
5571 * @since 13
5572 */
5573 NANDINAGARI,
5574
5575 /**
5576 * Unicode script "Nyiakeng Puachue Hmong".
5577 * @since 13
5578 */
5579 NYIAKENG_PUACHUE_HMONG,
5580
5581 /**
5582 * Unicode script "Wancho".
5583 * @since 13
5584 */
5585 WANCHO,
5586
5587 /**
5588 * Unicode script "Yezidi".
5589 * @since 15
5590 */
5591 YEZIDI,
5592
5593 /**
5594 * Unicode script "Chorasmian".
5595 * @since 15
5596 */
5597 CHORASMIAN,
5598
5599 /**
5600 * Unicode script "Dives Akuru".
5601 * @since 15
5602 */
5603 DIVES_AKURU,
5604
5605 /**
5606 * Unicode script "Khitan Small Script".
5607 * @since 15
5608 */
5609 KHITAN_SMALL_SCRIPT,
5610
5611 /**
5612 * Unicode script "Vithkuqi".
5613 * @since 19
5614 */
5615 VITHKUQI,
5616
5617 /**
5618 * Unicode script "Old Uyghur".
5619 * @since 19
5620 */
5621 OLD_UYGHUR,
5622
5623 /**
5624 * Unicode script "Cypro Minoan".
5625 * @since 19
5626 */
5627 CYPRO_MINOAN,
5628
5629 /**
5630 * Unicode script "Tangsa".
5631 * @since 19
5632 */
5633 TANGSA,
5634
5635 /**
5636 * Unicode script "Toto".
5637 * @since 19
5638 */
5639 TOTO,
5640
5641 /**
5642 * Unicode script "Kawi".
5643 * @since 20
5644 */
5645 KAWI,
5646
5647 /**
5648 * Unicode script "Nag Mundari".
5649 * @since 20
5650 */
5651 NAG_MUNDARI,
5652
5653 /**
5654 * Unicode script "Todhri".
5655 * @since 24
5656 */
5657 TODHRI,
5658
5659 /**
5660 * Unicode script "Garay".
5661 * @since 24
5662 */
5663 GARAY,
5664
5665 /**
5666 * Unicode script "Tulu Tigalari".
5667 * @since 24
5668 */
5669 TULU_TIGALARI,
5670
5671 /**
5672 * Unicode script "Sunuwar".
5673 * @since 24
5674 */
5675 SUNUWAR,
5676
5677 /**
5678 * Unicode script "Gurung Khema".
5679 * @since 24
5680 */
5681 GURUNG_KHEMA,
5682
5683 /**
5684 * Unicode script "Kirat Rai".
5685 * @since 24
5686 */
5687 KIRAT_RAI,
5688
5689 /**
5690 * Unicode script "Ol Onal".
5691 * @since 24
5692 */
5693 OL_ONAL,
5694
5695 /**
5696 * Unicode script "Sidetic".
5697 * @since 26
5698 */
5699 SIDETIC,
5700
5701 /**
5702 * Unicode script "Tolong Siki".
5703 * @since 26
5704 */
5705 TOLONG_SIKI,
5706
5707 /**
5708 * Unicode script "Beria Erfe".
5709 * @since 26
5710 */
5711 BERIA_ERFE,
5712
5713 /**
5714 * Unicode script "Tai Yo".
5715 * @since 26
5716 */
5717 TAI_YO,
5718
5719 /**
5720 * Unicode script "Unknown".
5721 */
5722 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map.
5723
5724 private static final int[] scriptStarts = {
5725 0x0000, // 0000..0040; COMMON
5726 0x0041, // 0041..005A; LATIN
5727 0x005B, // 005B..0060; COMMON
5728 0x0061, // 0061..007A; LATIN
5729 0x007B, // 007B..00A9; COMMON
5730 0x00AA, // 00AA ; LATIN
5731 0x00AB, // 00AB..00B9; COMMON
5732 0x00BA, // 00BA ; LATIN
5733 0x00BB, // 00BB..00BF; COMMON
5734 0x00C0, // 00C0..00D6; LATIN
5735 0x00D7, // 00D7 ; COMMON
5736 0x00D8, // 00D8..00F6; LATIN
5737 0x00F7, // 00F7 ; COMMON
5738 0x00F8, // 00F8..02B8; LATIN
5739 0x02B9, // 02B9..02DF; COMMON
5740 0x02E0, // 02E0..02E4; LATIN
5741 0x02E5, // 02E5..02E9; COMMON
5742 0x02EA, // 02EA..02EB; BOPOMOFO
5743 0x02EC, // 02EC..02FF; COMMON
5744 0x0300, // 0300..036F; INHERITED
5745 0x0370, // 0370..0373; GREEK
5746 0x0374, // 0374 ; COMMON
5747 0x0375, // 0375..0377; GREEK
5748 0x0378, // 0378..0379; UNKNOWN
5749 0x037A, // 037A..037D; GREEK
5750 0x037E, // 037E ; COMMON
5751 0x037F, // 037F ; GREEK
5752 0x0380, // 0380..0383; UNKNOWN
5753 0x0384, // 0384 ; GREEK
5754 0x0385, // 0385 ; COMMON
5755 0x0386, // 0386 ; GREEK
5756 0x0387, // 0387 ; COMMON
5757 0x0388, // 0388..038A; GREEK
5758 0x038B, // 038B ; UNKNOWN
5759 0x038C, // 038C ; GREEK
5760 0x038D, // 038D ; UNKNOWN
5761 0x038E, // 038E..03A1; GREEK
5762 0x03A2, // 03A2 ; UNKNOWN
5763 0x03A3, // 03A3..03E1; GREEK
5764 0x03E2, // 03E2..03EF; COPTIC
5765 0x03F0, // 03F0..03FF; GREEK
5766 0x0400, // 0400..0484; CYRILLIC
5767 0x0485, // 0485..0486; INHERITED
5768 0x0487, // 0487..052F; CYRILLIC
5769 0x0530, // 0530 ; UNKNOWN
5770 0x0531, // 0531..0556; ARMENIAN
5771 0x0557, // 0557..0558; UNKNOWN
5772 0x0559, // 0559..058A; ARMENIAN
5773 0x058B, // 058B..058C; UNKNOWN
5774 0x058D, // 058D..058F; ARMENIAN
5775 0x0590, // 0590 ; UNKNOWN
5776 0x0591, // 0591..05C7; HEBREW
5777 0x05C8, // 05C8..05CF; UNKNOWN
5778 0x05D0, // 05D0..05EA; HEBREW
5779 0x05EB, // 05EB..05EE; UNKNOWN
5780 0x05EF, // 05EF..05F4; HEBREW
5781 0x05F5, // 05F5..05FF; UNKNOWN
5782 0x0600, // 0600..0604; ARABIC
5783 0x0605, // 0605 ; COMMON
5784 0x0606, // 0606..060B; ARABIC
5785 0x060C, // 060C ; COMMON
5786 0x060D, // 060D..061A; ARABIC
5787 0x061B, // 061B ; COMMON
5788 0x061C, // 061C..061E; ARABIC
5789 0x061F, // 061F ; COMMON
5790 0x0620, // 0620..063F; ARABIC
5791 0x0640, // 0640 ; COMMON
5792 0x0641, // 0641..064A; ARABIC
5793 0x064B, // 064B..0655; INHERITED
5794 0x0656, // 0656..066F; ARABIC
5795 0x0670, // 0670 ; INHERITED
5796 0x0671, // 0671..06DC; ARABIC
5797 0x06DD, // 06DD ; COMMON
5798 0x06DE, // 06DE..06FF; ARABIC
5799 0x0700, // 0700..070D; SYRIAC
5800 0x070E, // 070E ; UNKNOWN
5801 0x070F, // 070F..074A; SYRIAC
5802 0x074B, // 074B..074C; UNKNOWN
5803 0x074D, // 074D..074F; SYRIAC
5804 0x0750, // 0750..077F; ARABIC
5805 0x0780, // 0780..07B1; THAANA
5806 0x07B2, // 07B2..07BF; UNKNOWN
5807 0x07C0, // 07C0..07FA; NKO
5808 0x07FB, // 07FB..07FC; UNKNOWN
5809 0x07FD, // 07FD..07FF; NKO
5810 0x0800, // 0800..082D; SAMARITAN
5811 0x082E, // 082E..082F; UNKNOWN
5812 0x0830, // 0830..083E; SAMARITAN
5813 0x083F, // 083F ; UNKNOWN
5814 0x0840, // 0840..085B; MANDAIC
5815 0x085C, // 085C..085D; UNKNOWN
5816 0x085E, // 085E ; MANDAIC
5817 0x085F, // 085F ; UNKNOWN
5818 0x0860, // 0860..086A; SYRIAC
5819 0x086B, // 086B..086F; UNKNOWN
5820 0x0870, // 0870..0891; ARABIC
5821 0x0892, // 0892..0896; UNKNOWN
5822 0x0897, // 0897..08E1; ARABIC
5823 0x08E2, // 08E2 ; COMMON
5824 0x08E3, // 08E3..08FF; ARABIC
5825 0x0900, // 0900..0950; DEVANAGARI
5826 0x0951, // 0951..0954; INHERITED
5827 0x0955, // 0955..0963; DEVANAGARI
5828 0x0964, // 0964..0965; COMMON
5829 0x0966, // 0966..097F; DEVANAGARI
5830 0x0980, // 0980..0983; BENGALI
5831 0x0984, // 0984 ; UNKNOWN
5832 0x0985, // 0985..098C; BENGALI
5833 0x098D, // 098D..098E; UNKNOWN
5834 0x098F, // 098F..0990; BENGALI
5835 0x0991, // 0991..0992; UNKNOWN
5836 0x0993, // 0993..09A8; BENGALI
5837 0x09A9, // 09A9 ; UNKNOWN
5838 0x09AA, // 09AA..09B0; BENGALI
5839 0x09B1, // 09B1 ; UNKNOWN
5840 0x09B2, // 09B2 ; BENGALI
5841 0x09B3, // 09B3..09B5; UNKNOWN
5842 0x09B6, // 09B6..09B9; BENGALI
5843 0x09BA, // 09BA..09BB; UNKNOWN
5844 0x09BC, // 09BC..09C4; BENGALI
5845 0x09C5, // 09C5..09C6; UNKNOWN
5846 0x09C7, // 09C7..09C8; BENGALI
5847 0x09C9, // 09C9..09CA; UNKNOWN
5848 0x09CB, // 09CB..09CE; BENGALI
5849 0x09CF, // 09CF..09D6; UNKNOWN
5850 0x09D7, // 09D7 ; BENGALI
5851 0x09D8, // 09D8..09DB; UNKNOWN
5852 0x09DC, // 09DC..09DD; BENGALI
5853 0x09DE, // 09DE ; UNKNOWN
5854 0x09DF, // 09DF..09E3; BENGALI
5855 0x09E4, // 09E4..09E5; UNKNOWN
5856 0x09E6, // 09E6..09FE; BENGALI
5857 0x09FF, // 09FF..0A00; UNKNOWN
5858 0x0A01, // 0A01..0A03; GURMUKHI
5859 0x0A04, // 0A04 ; UNKNOWN
5860 0x0A05, // 0A05..0A0A; GURMUKHI
5861 0x0A0B, // 0A0B..0A0E; UNKNOWN
5862 0x0A0F, // 0A0F..0A10; GURMUKHI
5863 0x0A11, // 0A11..0A12; UNKNOWN
5864 0x0A13, // 0A13..0A28; GURMUKHI
5865 0x0A29, // 0A29 ; UNKNOWN
5866 0x0A2A, // 0A2A..0A30; GURMUKHI
5867 0x0A31, // 0A31 ; UNKNOWN
5868 0x0A32, // 0A32..0A33; GURMUKHI
5869 0x0A34, // 0A34 ; UNKNOWN
5870 0x0A35, // 0A35..0A36; GURMUKHI
5871 0x0A37, // 0A37 ; UNKNOWN
5872 0x0A38, // 0A38..0A39; GURMUKHI
5873 0x0A3A, // 0A3A..0A3B; UNKNOWN
5874 0x0A3C, // 0A3C ; GURMUKHI
5875 0x0A3D, // 0A3D ; UNKNOWN
5876 0x0A3E, // 0A3E..0A42; GURMUKHI
5877 0x0A43, // 0A43..0A46; UNKNOWN
5878 0x0A47, // 0A47..0A48; GURMUKHI
5879 0x0A49, // 0A49..0A4A; UNKNOWN
5880 0x0A4B, // 0A4B..0A4D; GURMUKHI
5881 0x0A4E, // 0A4E..0A50; UNKNOWN
5882 0x0A51, // 0A51 ; GURMUKHI
5883 0x0A52, // 0A52..0A58; UNKNOWN
5884 0x0A59, // 0A59..0A5C; GURMUKHI
5885 0x0A5D, // 0A5D ; UNKNOWN
5886 0x0A5E, // 0A5E ; GURMUKHI
5887 0x0A5F, // 0A5F..0A65; UNKNOWN
5888 0x0A66, // 0A66..0A76; GURMUKHI
5889 0x0A77, // 0A77..0A80; UNKNOWN
5890 0x0A81, // 0A81..0A83; GUJARATI
5891 0x0A84, // 0A84 ; UNKNOWN
5892 0x0A85, // 0A85..0A8D; GUJARATI
5893 0x0A8E, // 0A8E ; UNKNOWN
5894 0x0A8F, // 0A8F..0A91; GUJARATI
5895 0x0A92, // 0A92 ; UNKNOWN
5896 0x0A93, // 0A93..0AA8; GUJARATI
5897 0x0AA9, // 0AA9 ; UNKNOWN
5898 0x0AAA, // 0AAA..0AB0; GUJARATI
5899 0x0AB1, // 0AB1 ; UNKNOWN
5900 0x0AB2, // 0AB2..0AB3; GUJARATI
5901 0x0AB4, // 0AB4 ; UNKNOWN
5902 0x0AB5, // 0AB5..0AB9; GUJARATI
5903 0x0ABA, // 0ABA..0ABB; UNKNOWN
5904 0x0ABC, // 0ABC..0AC5; GUJARATI
5905 0x0AC6, // 0AC6 ; UNKNOWN
5906 0x0AC7, // 0AC7..0AC9; GUJARATI
5907 0x0ACA, // 0ACA ; UNKNOWN
5908 0x0ACB, // 0ACB..0ACD; GUJARATI
5909 0x0ACE, // 0ACE..0ACF; UNKNOWN
5910 0x0AD0, // 0AD0 ; GUJARATI
5911 0x0AD1, // 0AD1..0ADF; UNKNOWN
5912 0x0AE0, // 0AE0..0AE3; GUJARATI
5913 0x0AE4, // 0AE4..0AE5; UNKNOWN
5914 0x0AE6, // 0AE6..0AF1; GUJARATI
5915 0x0AF2, // 0AF2..0AF8; UNKNOWN
5916 0x0AF9, // 0AF9..0AFF; GUJARATI
5917 0x0B00, // 0B00 ; UNKNOWN
5918 0x0B01, // 0B01..0B03; ORIYA
5919 0x0B04, // 0B04 ; UNKNOWN
5920 0x0B05, // 0B05..0B0C; ORIYA
5921 0x0B0D, // 0B0D..0B0E; UNKNOWN
5922 0x0B0F, // 0B0F..0B10; ORIYA
5923 0x0B11, // 0B11..0B12; UNKNOWN
5924 0x0B13, // 0B13..0B28; ORIYA
5925 0x0B29, // 0B29 ; UNKNOWN
5926 0x0B2A, // 0B2A..0B30; ORIYA
5927 0x0B31, // 0B31 ; UNKNOWN
5928 0x0B32, // 0B32..0B33; ORIYA
5929 0x0B34, // 0B34 ; UNKNOWN
5930 0x0B35, // 0B35..0B39; ORIYA
5931 0x0B3A, // 0B3A..0B3B; UNKNOWN
5932 0x0B3C, // 0B3C..0B44; ORIYA
5933 0x0B45, // 0B45..0B46; UNKNOWN
5934 0x0B47, // 0B47..0B48; ORIYA
5935 0x0B49, // 0B49..0B4A; UNKNOWN
5936 0x0B4B, // 0B4B..0B4D; ORIYA
5937 0x0B4E, // 0B4E..0B54; UNKNOWN
5938 0x0B55, // 0B55..0B57; ORIYA
5939 0x0B58, // 0B58..0B5B; UNKNOWN
5940 0x0B5C, // 0B5C..0B5D; ORIYA
5941 0x0B5E, // 0B5E ; UNKNOWN
5942 0x0B5F, // 0B5F..0B63; ORIYA
5943 0x0B64, // 0B64..0B65; UNKNOWN
5944 0x0B66, // 0B66..0B77; ORIYA
5945 0x0B78, // 0B78..0B81; UNKNOWN
5946 0x0B82, // 0B82..0B83; TAMIL
5947 0x0B84, // 0B84 ; UNKNOWN
5948 0x0B85, // 0B85..0B8A; TAMIL
5949 0x0B8B, // 0B8B..0B8D; UNKNOWN
5950 0x0B8E, // 0B8E..0B90; TAMIL
5951 0x0B91, // 0B91 ; UNKNOWN
5952 0x0B92, // 0B92..0B95; TAMIL
5953 0x0B96, // 0B96..0B98; UNKNOWN
5954 0x0B99, // 0B99..0B9A; TAMIL
5955 0x0B9B, // 0B9B ; UNKNOWN
5956 0x0B9C, // 0B9C ; TAMIL
5957 0x0B9D, // 0B9D ; UNKNOWN
5958 0x0B9E, // 0B9E..0B9F; TAMIL
5959 0x0BA0, // 0BA0..0BA2; UNKNOWN
5960 0x0BA3, // 0BA3..0BA4; TAMIL
5961 0x0BA5, // 0BA5..0BA7; UNKNOWN
5962 0x0BA8, // 0BA8..0BAA; TAMIL
5963 0x0BAB, // 0BAB..0BAD; UNKNOWN
5964 0x0BAE, // 0BAE..0BB9; TAMIL
5965 0x0BBA, // 0BBA..0BBD; UNKNOWN
5966 0x0BBE, // 0BBE..0BC2; TAMIL
5967 0x0BC3, // 0BC3..0BC5; UNKNOWN
5968 0x0BC6, // 0BC6..0BC8; TAMIL
5969 0x0BC9, // 0BC9 ; UNKNOWN
5970 0x0BCA, // 0BCA..0BCD; TAMIL
5971 0x0BCE, // 0BCE..0BCF; UNKNOWN
5972 0x0BD0, // 0BD0 ; TAMIL
5973 0x0BD1, // 0BD1..0BD6; UNKNOWN
5974 0x0BD7, // 0BD7 ; TAMIL
5975 0x0BD8, // 0BD8..0BE5; UNKNOWN
5976 0x0BE6, // 0BE6..0BFA; TAMIL
5977 0x0BFB, // 0BFB..0BFF; UNKNOWN
5978 0x0C00, // 0C00..0C0C; TELUGU
5979 0x0C0D, // 0C0D ; UNKNOWN
5980 0x0C0E, // 0C0E..0C10; TELUGU
5981 0x0C11, // 0C11 ; UNKNOWN
5982 0x0C12, // 0C12..0C28; TELUGU
5983 0x0C29, // 0C29 ; UNKNOWN
5984 0x0C2A, // 0C2A..0C39; TELUGU
5985 0x0C3A, // 0C3A..0C3B; UNKNOWN
5986 0x0C3C, // 0C3C..0C44; TELUGU
5987 0x0C45, // 0C45 ; UNKNOWN
5988 0x0C46, // 0C46..0C48; TELUGU
5989 0x0C49, // 0C49 ; UNKNOWN
5990 0x0C4A, // 0C4A..0C4D; TELUGU
5991 0x0C4E, // 0C4E..0C54; UNKNOWN
5992 0x0C55, // 0C55..0C56; TELUGU
5993 0x0C57, // 0C57 ; UNKNOWN
5994 0x0C58, // 0C58..0C5A; TELUGU
5995 0x0C5B, // 0C5B ; UNKNOWN
5996 0x0C5C, // 0C5C..0C5D; TELUGU
5997 0x0C5E, // 0C5E..0C5F; UNKNOWN
5998 0x0C60, // 0C60..0C63; TELUGU
5999 0x0C64, // 0C64..0C65; UNKNOWN
6000 0x0C66, // 0C66..0C6F; TELUGU
6001 0x0C70, // 0C70..0C76; UNKNOWN
6002 0x0C77, // 0C77..0C7F; TELUGU
6003 0x0C80, // 0C80..0C8C; KANNADA
6004 0x0C8D, // 0C8D ; UNKNOWN
6005 0x0C8E, // 0C8E..0C90; KANNADA
6006 0x0C91, // 0C91 ; UNKNOWN
6007 0x0C92, // 0C92..0CA8; KANNADA
6008 0x0CA9, // 0CA9 ; UNKNOWN
6009 0x0CAA, // 0CAA..0CB3; KANNADA
6010 0x0CB4, // 0CB4 ; UNKNOWN
6011 0x0CB5, // 0CB5..0CB9; KANNADA
6012 0x0CBA, // 0CBA..0CBB; UNKNOWN
6013 0x0CBC, // 0CBC..0CC4; KANNADA
6014 0x0CC5, // 0CC5 ; UNKNOWN
6015 0x0CC6, // 0CC6..0CC8; KANNADA
6016 0x0CC9, // 0CC9 ; UNKNOWN
6017 0x0CCA, // 0CCA..0CCD; KANNADA
6018 0x0CCE, // 0CCE..0CD4; UNKNOWN
6019 0x0CD5, // 0CD5..0CD6; KANNADA
6020 0x0CD7, // 0CD7..0CDB; UNKNOWN
6021 0x0CDC, // 0CDC..0CDE; KANNADA
6022 0x0CDF, // 0CDF ; UNKNOWN
6023 0x0CE0, // 0CE0..0CE3; KANNADA
6024 0x0CE4, // 0CE4..0CE5; UNKNOWN
6025 0x0CE6, // 0CE6..0CEF; KANNADA
6026 0x0CF0, // 0CF0 ; UNKNOWN
6027 0x0CF1, // 0CF1..0CF3; KANNADA
6028 0x0CF4, // 0CF4..0CFF; UNKNOWN
6029 0x0D00, // 0D00..0D0C; MALAYALAM
6030 0x0D0D, // 0D0D ; UNKNOWN
6031 0x0D0E, // 0D0E..0D10; MALAYALAM
6032 0x0D11, // 0D11 ; UNKNOWN
6033 0x0D12, // 0D12..0D44; MALAYALAM
6034 0x0D45, // 0D45 ; UNKNOWN
6035 0x0D46, // 0D46..0D48; MALAYALAM
6036 0x0D49, // 0D49 ; UNKNOWN
6037 0x0D4A, // 0D4A..0D4F; MALAYALAM
6038 0x0D50, // 0D50..0D53; UNKNOWN
6039 0x0D54, // 0D54..0D63; MALAYALAM
6040 0x0D64, // 0D64..0D65; UNKNOWN
6041 0x0D66, // 0D66..0D7F; MALAYALAM
6042 0x0D80, // 0D80 ; UNKNOWN
6043 0x0D81, // 0D81..0D83; SINHALA
6044 0x0D84, // 0D84 ; UNKNOWN
6045 0x0D85, // 0D85..0D96; SINHALA
6046 0x0D97, // 0D97..0D99; UNKNOWN
6047 0x0D9A, // 0D9A..0DB1; SINHALA
6048 0x0DB2, // 0DB2 ; UNKNOWN
6049 0x0DB3, // 0DB3..0DBB; SINHALA
6050 0x0DBC, // 0DBC ; UNKNOWN
6051 0x0DBD, // 0DBD ; SINHALA
6052 0x0DBE, // 0DBE..0DBF; UNKNOWN
6053 0x0DC0, // 0DC0..0DC6; SINHALA
6054 0x0DC7, // 0DC7..0DC9; UNKNOWN
6055 0x0DCA, // 0DCA ; SINHALA
6056 0x0DCB, // 0DCB..0DCE; UNKNOWN
6057 0x0DCF, // 0DCF..0DD4; SINHALA
6058 0x0DD5, // 0DD5 ; UNKNOWN
6059 0x0DD6, // 0DD6 ; SINHALA
6060 0x0DD7, // 0DD7 ; UNKNOWN
6061 0x0DD8, // 0DD8..0DDF; SINHALA
6062 0x0DE0, // 0DE0..0DE5; UNKNOWN
6063 0x0DE6, // 0DE6..0DEF; SINHALA
6064 0x0DF0, // 0DF0..0DF1; UNKNOWN
6065 0x0DF2, // 0DF2..0DF4; SINHALA
6066 0x0DF5, // 0DF5..0E00; UNKNOWN
6067 0x0E01, // 0E01..0E3A; THAI
6068 0x0E3B, // 0E3B..0E3E; UNKNOWN
6069 0x0E3F, // 0E3F ; COMMON
6070 0x0E40, // 0E40..0E5B; THAI
6071 0x0E5C, // 0E5C..0E80; UNKNOWN
6072 0x0E81, // 0E81..0E82; LAO
6073 0x0E83, // 0E83 ; UNKNOWN
6074 0x0E84, // 0E84 ; LAO
6075 0x0E85, // 0E85 ; UNKNOWN
6076 0x0E86, // 0E86..0E8A; LAO
6077 0x0E8B, // 0E8B ; UNKNOWN
6078 0x0E8C, // 0E8C..0EA3; LAO
6079 0x0EA4, // 0EA4 ; UNKNOWN
6080 0x0EA5, // 0EA5 ; LAO
6081 0x0EA6, // 0EA6 ; UNKNOWN
6082 0x0EA7, // 0EA7..0EBD; LAO
6083 0x0EBE, // 0EBE..0EBF; UNKNOWN
6084 0x0EC0, // 0EC0..0EC4; LAO
6085 0x0EC5, // 0EC5 ; UNKNOWN
6086 0x0EC6, // 0EC6 ; LAO
6087 0x0EC7, // 0EC7 ; UNKNOWN
6088 0x0EC8, // 0EC8..0ECE; LAO
6089 0x0ECF, // 0ECF ; UNKNOWN
6090 0x0ED0, // 0ED0..0ED9; LAO
6091 0x0EDA, // 0EDA..0EDB; UNKNOWN
6092 0x0EDC, // 0EDC..0EDF; LAO
6093 0x0EE0, // 0EE0..0EFF; UNKNOWN
6094 0x0F00, // 0F00..0F47; TIBETAN
6095 0x0F48, // 0F48 ; UNKNOWN
6096 0x0F49, // 0F49..0F6C; TIBETAN
6097 0x0F6D, // 0F6D..0F70; UNKNOWN
6098 0x0F71, // 0F71..0F97; TIBETAN
6099 0x0F98, // 0F98 ; UNKNOWN
6100 0x0F99, // 0F99..0FBC; TIBETAN
6101 0x0FBD, // 0FBD ; UNKNOWN
6102 0x0FBE, // 0FBE..0FCC; TIBETAN
6103 0x0FCD, // 0FCD ; UNKNOWN
6104 0x0FCE, // 0FCE..0FD4; TIBETAN
6105 0x0FD5, // 0FD5..0FD8; COMMON
6106 0x0FD9, // 0FD9..0FDA; TIBETAN
6107 0x0FDB, // 0FDB..0FFF; UNKNOWN
6108 0x1000, // 1000..109F; MYANMAR
6109 0x10A0, // 10A0..10C5; GEORGIAN
6110 0x10C6, // 10C6 ; UNKNOWN
6111 0x10C7, // 10C7 ; GEORGIAN
6112 0x10C8, // 10C8..10CC; UNKNOWN
6113 0x10CD, // 10CD ; GEORGIAN
6114 0x10CE, // 10CE..10CF; UNKNOWN
6115 0x10D0, // 10D0..10FA; GEORGIAN
6116 0x10FB, // 10FB ; COMMON
6117 0x10FC, // 10FC..10FF; GEORGIAN
6118 0x1100, // 1100..11FF; HANGUL
6119 0x1200, // 1200..1248; ETHIOPIC
6120 0x1249, // 1249 ; UNKNOWN
6121 0x124A, // 124A..124D; ETHIOPIC
6122 0x124E, // 124E..124F; UNKNOWN
6123 0x1250, // 1250..1256; ETHIOPIC
6124 0x1257, // 1257 ; UNKNOWN
6125 0x1258, // 1258 ; ETHIOPIC
6126 0x1259, // 1259 ; UNKNOWN
6127 0x125A, // 125A..125D; ETHIOPIC
6128 0x125E, // 125E..125F; UNKNOWN
6129 0x1260, // 1260..1288; ETHIOPIC
6130 0x1289, // 1289 ; UNKNOWN
6131 0x128A, // 128A..128D; ETHIOPIC
6132 0x128E, // 128E..128F; UNKNOWN
6133 0x1290, // 1290..12B0; ETHIOPIC
6134 0x12B1, // 12B1 ; UNKNOWN
6135 0x12B2, // 12B2..12B5; ETHIOPIC
6136 0x12B6, // 12B6..12B7; UNKNOWN
6137 0x12B8, // 12B8..12BE; ETHIOPIC
6138 0x12BF, // 12BF ; UNKNOWN
6139 0x12C0, // 12C0 ; ETHIOPIC
6140 0x12C1, // 12C1 ; UNKNOWN
6141 0x12C2, // 12C2..12C5; ETHIOPIC
6142 0x12C6, // 12C6..12C7; UNKNOWN
6143 0x12C8, // 12C8..12D6; ETHIOPIC
6144 0x12D7, // 12D7 ; UNKNOWN
6145 0x12D8, // 12D8..1310; ETHIOPIC
6146 0x1311, // 1311 ; UNKNOWN
6147 0x1312, // 1312..1315; ETHIOPIC
6148 0x1316, // 1316..1317; UNKNOWN
6149 0x1318, // 1318..135A; ETHIOPIC
6150 0x135B, // 135B..135C; UNKNOWN
6151 0x135D, // 135D..137C; ETHIOPIC
6152 0x137D, // 137D..137F; UNKNOWN
6153 0x1380, // 1380..1399; ETHIOPIC
6154 0x139A, // 139A..139F; UNKNOWN
6155 0x13A0, // 13A0..13F5; CHEROKEE
6156 0x13F6, // 13F6..13F7; UNKNOWN
6157 0x13F8, // 13F8..13FD; CHEROKEE
6158 0x13FE, // 13FE..13FF; UNKNOWN
6159 0x1400, // 1400..167F; CANADIAN_ABORIGINAL
6160 0x1680, // 1680..169C; OGHAM
6161 0x169D, // 169D..169F; UNKNOWN
6162 0x16A0, // 16A0..16EA; RUNIC
6163 0x16EB, // 16EB..16ED; COMMON
6164 0x16EE, // 16EE..16F8; RUNIC
6165 0x16F9, // 16F9..16FF; UNKNOWN
6166 0x1700, // 1700..1715; TAGALOG
6167 0x1716, // 1716..171E; UNKNOWN
6168 0x171F, // 171F ; TAGALOG
6169 0x1720, // 1720..1734; HANUNOO
6170 0x1735, // 1735..1736; COMMON
6171 0x1737, // 1737..173F; UNKNOWN
6172 0x1740, // 1740..1753; BUHID
6173 0x1754, // 1754..175F; UNKNOWN
6174 0x1760, // 1760..176C; TAGBANWA
6175 0x176D, // 176D ; UNKNOWN
6176 0x176E, // 176E..1770; TAGBANWA
6177 0x1771, // 1771 ; UNKNOWN
6178 0x1772, // 1772..1773; TAGBANWA
6179 0x1774, // 1774..177F; UNKNOWN
6180 0x1780, // 1780..17DD; KHMER
6181 0x17DE, // 17DE..17DF; UNKNOWN
6182 0x17E0, // 17E0..17E9; KHMER
6183 0x17EA, // 17EA..17EF; UNKNOWN
6184 0x17F0, // 17F0..17F9; KHMER
6185 0x17FA, // 17FA..17FF; UNKNOWN
6186 0x1800, // 1800..1801; MONGOLIAN
6187 0x1802, // 1802..1803; COMMON
6188 0x1804, // 1804 ; MONGOLIAN
6189 0x1805, // 1805 ; COMMON
6190 0x1806, // 1806..1819; MONGOLIAN
6191 0x181A, // 181A..181F; UNKNOWN
6192 0x1820, // 1820..1878; MONGOLIAN
6193 0x1879, // 1879..187F; UNKNOWN
6194 0x1880, // 1880..18AA; MONGOLIAN
6195 0x18AB, // 18AB..18AF; UNKNOWN
6196 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL
6197 0x18F6, // 18F6..18FF; UNKNOWN
6198 0x1900, // 1900..191E; LIMBU
6199 0x191F, // 191F ; UNKNOWN
6200 0x1920, // 1920..192B; LIMBU
6201 0x192C, // 192C..192F; UNKNOWN
6202 0x1930, // 1930..193B; LIMBU
6203 0x193C, // 193C..193F; UNKNOWN
6204 0x1940, // 1940 ; LIMBU
6205 0x1941, // 1941..1943; UNKNOWN
6206 0x1944, // 1944..194F; LIMBU
6207 0x1950, // 1950..196D; TAI_LE
6208 0x196E, // 196E..196F; UNKNOWN
6209 0x1970, // 1970..1974; TAI_LE
6210 0x1975, // 1975..197F; UNKNOWN
6211 0x1980, // 1980..19AB; NEW_TAI_LUE
6212 0x19AC, // 19AC..19AF; UNKNOWN
6213 0x19B0, // 19B0..19C9; NEW_TAI_LUE
6214 0x19CA, // 19CA..19CF; UNKNOWN
6215 0x19D0, // 19D0..19DA; NEW_TAI_LUE
6216 0x19DB, // 19DB..19DD; UNKNOWN
6217 0x19DE, // 19DE..19DF; NEW_TAI_LUE
6218 0x19E0, // 19E0..19FF; KHMER
6219 0x1A00, // 1A00..1A1B; BUGINESE
6220 0x1A1C, // 1A1C..1A1D; UNKNOWN
6221 0x1A1E, // 1A1E..1A1F; BUGINESE
6222 0x1A20, // 1A20..1A5E; TAI_THAM
6223 0x1A5F, // 1A5F ; UNKNOWN
6224 0x1A60, // 1A60..1A7C; TAI_THAM
6225 0x1A7D, // 1A7D..1A7E; UNKNOWN
6226 0x1A7F, // 1A7F..1A89; TAI_THAM
6227 0x1A8A, // 1A8A..1A8F; UNKNOWN
6228 0x1A90, // 1A90..1A99; TAI_THAM
6229 0x1A9A, // 1A9A..1A9F; UNKNOWN
6230 0x1AA0, // 1AA0..1AAD; TAI_THAM
6231 0x1AAE, // 1AAE..1AAF; UNKNOWN
6232 0x1AB0, // 1AB0..1ADD; INHERITED
6233 0x1ADE, // 1ADE..1ADF; UNKNOWN
6234 0x1AE0, // 1AE0..1AEB; INHERITED
6235 0x1AEC, // 1AEC..1AFF; UNKNOWN
6236 0x1B00, // 1B00..1B4C; BALINESE
6237 0x1B4D, // 1B4D ; UNKNOWN
6238 0x1B4E, // 1B4E..1B7F; BALINESE
6239 0x1B80, // 1B80..1BBF; SUNDANESE
6240 0x1BC0, // 1BC0..1BF3; BATAK
6241 0x1BF4, // 1BF4..1BFB; UNKNOWN
6242 0x1BFC, // 1BFC..1BFF; BATAK
6243 0x1C00, // 1C00..1C37; LEPCHA
6244 0x1C38, // 1C38..1C3A; UNKNOWN
6245 0x1C3B, // 1C3B..1C49; LEPCHA
6246 0x1C4A, // 1C4A..1C4C; UNKNOWN
6247 0x1C4D, // 1C4D..1C4F; LEPCHA
6248 0x1C50, // 1C50..1C7F; OL_CHIKI
6249 0x1C80, // 1C80..1C8A; CYRILLIC
6250 0x1C8B, // 1C8B..1C8F; UNKNOWN
6251 0x1C90, // 1C90..1CBA; GEORGIAN
6252 0x1CBB, // 1CBB..1CBC; UNKNOWN
6253 0x1CBD, // 1CBD..1CBF; GEORGIAN
6254 0x1CC0, // 1CC0..1CC7; SUNDANESE
6255 0x1CC8, // 1CC8..1CCF; UNKNOWN
6256 0x1CD0, // 1CD0..1CD2; INHERITED
6257 0x1CD3, // 1CD3 ; COMMON
6258 0x1CD4, // 1CD4..1CE0; INHERITED
6259 0x1CE1, // 1CE1 ; COMMON
6260 0x1CE2, // 1CE2..1CE8; INHERITED
6261 0x1CE9, // 1CE9..1CEC; COMMON
6262 0x1CED, // 1CED ; INHERITED
6263 0x1CEE, // 1CEE..1CF3; COMMON
6264 0x1CF4, // 1CF4 ; INHERITED
6265 0x1CF5, // 1CF5..1CF7; COMMON
6266 0x1CF8, // 1CF8..1CF9; INHERITED
6267 0x1CFA, // 1CFA ; COMMON
6268 0x1CFB, // 1CFB..1CFF; UNKNOWN
6269 0x1D00, // 1D00..1D25; LATIN
6270 0x1D26, // 1D26..1D2A; GREEK
6271 0x1D2B, // 1D2B ; CYRILLIC
6272 0x1D2C, // 1D2C..1D5C; LATIN
6273 0x1D5D, // 1D5D..1D61; GREEK
6274 0x1D62, // 1D62..1D65; LATIN
6275 0x1D66, // 1D66..1D6A; GREEK
6276 0x1D6B, // 1D6B..1D77; LATIN
6277 0x1D78, // 1D78 ; CYRILLIC
6278 0x1D79, // 1D79..1DBE; LATIN
6279 0x1DBF, // 1DBF ; GREEK
6280 0x1DC0, // 1DC0..1DFF; INHERITED
6281 0x1E00, // 1E00..1EFF; LATIN
6282 0x1F00, // 1F00..1F15; GREEK
6283 0x1F16, // 1F16..1F17; UNKNOWN
6284 0x1F18, // 1F18..1F1D; GREEK
6285 0x1F1E, // 1F1E..1F1F; UNKNOWN
6286 0x1F20, // 1F20..1F45; GREEK
6287 0x1F46, // 1F46..1F47; UNKNOWN
6288 0x1F48, // 1F48..1F4D; GREEK
6289 0x1F4E, // 1F4E..1F4F; UNKNOWN
6290 0x1F50, // 1F50..1F57; GREEK
6291 0x1F58, // 1F58 ; UNKNOWN
6292 0x1F59, // 1F59 ; GREEK
6293 0x1F5A, // 1F5A ; UNKNOWN
6294 0x1F5B, // 1F5B ; GREEK
6295 0x1F5C, // 1F5C ; UNKNOWN
6296 0x1F5D, // 1F5D ; GREEK
6297 0x1F5E, // 1F5E ; UNKNOWN
6298 0x1F5F, // 1F5F..1F7D; GREEK
6299 0x1F7E, // 1F7E..1F7F; UNKNOWN
6300 0x1F80, // 1F80..1FB4; GREEK
6301 0x1FB5, // 1FB5 ; UNKNOWN
6302 0x1FB6, // 1FB6..1FC4; GREEK
6303 0x1FC5, // 1FC5 ; UNKNOWN
6304 0x1FC6, // 1FC6..1FD3; GREEK
6305 0x1FD4, // 1FD4..1FD5; UNKNOWN
6306 0x1FD6, // 1FD6..1FDB; GREEK
6307 0x1FDC, // 1FDC ; UNKNOWN
6308 0x1FDD, // 1FDD..1FEF; GREEK
6309 0x1FF0, // 1FF0..1FF1; UNKNOWN
6310 0x1FF2, // 1FF2..1FF4; GREEK
6311 0x1FF5, // 1FF5 ; UNKNOWN
6312 0x1FF6, // 1FF6..1FFE; GREEK
6313 0x1FFF, // 1FFF ; UNKNOWN
6314 0x2000, // 2000..200B; COMMON
6315 0x200C, // 200C..200D; INHERITED
6316 0x200E, // 200E..2064; COMMON
6317 0x2065, // 2065 ; UNKNOWN
6318 0x2066, // 2066..2070; COMMON
6319 0x2071, // 2071 ; LATIN
6320 0x2072, // 2072..2073; UNKNOWN
6321 0x2074, // 2074..207E; COMMON
6322 0x207F, // 207F ; LATIN
6323 0x2080, // 2080..208E; COMMON
6324 0x208F, // 208F ; UNKNOWN
6325 0x2090, // 2090..209C; LATIN
6326 0x209D, // 209D..209F; UNKNOWN
6327 0x20A0, // 20A0..20C1; COMMON
6328 0x20C2, // 20C2..20CF; UNKNOWN
6329 0x20D0, // 20D0..20F0; INHERITED
6330 0x20F1, // 20F1..20FF; UNKNOWN
6331 0x2100, // 2100..2125; COMMON
6332 0x2126, // 2126 ; GREEK
6333 0x2127, // 2127..2129; COMMON
6334 0x212A, // 212A..212B; LATIN
6335 0x212C, // 212C..2131; COMMON
6336 0x2132, // 2132 ; LATIN
6337 0x2133, // 2133..214D; COMMON
6338 0x214E, // 214E ; LATIN
6339 0x214F, // 214F..215F; COMMON
6340 0x2160, // 2160..2188; LATIN
6341 0x2189, // 2189..218B; COMMON
6342 0x218C, // 218C..218F; UNKNOWN
6343 0x2190, // 2190..2429; COMMON
6344 0x242A, // 242A..243F; UNKNOWN
6345 0x2440, // 2440..244A; COMMON
6346 0x244B, // 244B..245F; UNKNOWN
6347 0x2460, // 2460..27FF; COMMON
6348 0x2800, // 2800..28FF; BRAILLE
6349 0x2900, // 2900..2B73; COMMON
6350 0x2B74, // 2B74..2B75; UNKNOWN
6351 0x2B76, // 2B76..2BFF; COMMON
6352 0x2C00, // 2C00..2C5F; GLAGOLITIC
6353 0x2C60, // 2C60..2C7F; LATIN
6354 0x2C80, // 2C80..2CF3; COPTIC
6355 0x2CF4, // 2CF4..2CF8; UNKNOWN
6356 0x2CF9, // 2CF9..2CFF; COPTIC
6357 0x2D00, // 2D00..2D25; GEORGIAN
6358 0x2D26, // 2D26 ; UNKNOWN
6359 0x2D27, // 2D27 ; GEORGIAN
6360 0x2D28, // 2D28..2D2C; UNKNOWN
6361 0x2D2D, // 2D2D ; GEORGIAN
6362 0x2D2E, // 2D2E..2D2F; UNKNOWN
6363 0x2D30, // 2D30..2D67; TIFINAGH
6364 0x2D68, // 2D68..2D6E; UNKNOWN
6365 0x2D6F, // 2D6F..2D70; TIFINAGH
6366 0x2D71, // 2D71..2D7E; UNKNOWN
6367 0x2D7F, // 2D7F ; TIFINAGH
6368 0x2D80, // 2D80..2D96; ETHIOPIC
6369 0x2D97, // 2D97..2D9F; UNKNOWN
6370 0x2DA0, // 2DA0..2DA6; ETHIOPIC
6371 0x2DA7, // 2DA7 ; UNKNOWN
6372 0x2DA8, // 2DA8..2DAE; ETHIOPIC
6373 0x2DAF, // 2DAF ; UNKNOWN
6374 0x2DB0, // 2DB0..2DB6; ETHIOPIC
6375 0x2DB7, // 2DB7 ; UNKNOWN
6376 0x2DB8, // 2DB8..2DBE; ETHIOPIC
6377 0x2DBF, // 2DBF ; UNKNOWN
6378 0x2DC0, // 2DC0..2DC6; ETHIOPIC
6379 0x2DC7, // 2DC7 ; UNKNOWN
6380 0x2DC8, // 2DC8..2DCE; ETHIOPIC
6381 0x2DCF, // 2DCF ; UNKNOWN
6382 0x2DD0, // 2DD0..2DD6; ETHIOPIC
6383 0x2DD7, // 2DD7 ; UNKNOWN
6384 0x2DD8, // 2DD8..2DDE; ETHIOPIC
6385 0x2DDF, // 2DDF ; UNKNOWN
6386 0x2DE0, // 2DE0..2DFF; CYRILLIC
6387 0x2E00, // 2E00..2E5D; COMMON
6388 0x2E5E, // 2E5E..2E7F; UNKNOWN
6389 0x2E80, // 2E80..2E99; HAN
6390 0x2E9A, // 2E9A ; UNKNOWN
6391 0x2E9B, // 2E9B..2EF3; HAN
6392 0x2EF4, // 2EF4..2EFF; UNKNOWN
6393 0x2F00, // 2F00..2FD5; HAN
6394 0x2FD6, // 2FD6..2FEF; UNKNOWN
6395 0x2FF0, // 2FF0..3004; COMMON
6396 0x3005, // 3005 ; HAN
6397 0x3006, // 3006 ; COMMON
6398 0x3007, // 3007 ; HAN
6399 0x3008, // 3008..3020; COMMON
6400 0x3021, // 3021..3029; HAN
6401 0x302A, // 302A..302D; INHERITED
6402 0x302E, // 302E..302F; HANGUL
6403 0x3030, // 3030..3037; COMMON
6404 0x3038, // 3038..303B; HAN
6405 0x303C, // 303C..303F; COMMON
6406 0x3040, // 3040 ; UNKNOWN
6407 0x3041, // 3041..3096; HIRAGANA
6408 0x3097, // 3097..3098; UNKNOWN
6409 0x3099, // 3099..309A; INHERITED
6410 0x309B, // 309B..309C; COMMON
6411 0x309D, // 309D..309F; HIRAGANA
6412 0x30A0, // 30A0 ; COMMON
6413 0x30A1, // 30A1..30FA; KATAKANA
6414 0x30FB, // 30FB..30FC; COMMON
6415 0x30FD, // 30FD..30FF; KATAKANA
6416 0x3100, // 3100..3104; UNKNOWN
6417 0x3105, // 3105..312F; BOPOMOFO
6418 0x3130, // 3130 ; UNKNOWN
6419 0x3131, // 3131..318E; HANGUL
6420 0x318F, // 318F ; UNKNOWN
6421 0x3190, // 3190..319F; COMMON
6422 0x31A0, // 31A0..31BF; BOPOMOFO
6423 0x31C0, // 31C0..31E5; COMMON
6424 0x31E6, // 31E6..31EE; UNKNOWN
6425 0x31EF, // 31EF ; COMMON
6426 0x31F0, // 31F0..31FF; KATAKANA
6427 0x3200, // 3200..321E; HANGUL
6428 0x321F, // 321F ; UNKNOWN
6429 0x3220, // 3220..325F; COMMON
6430 0x3260, // 3260..327E; HANGUL
6431 0x327F, // 327F..32CF; COMMON
6432 0x32D0, // 32D0..32FE; KATAKANA
6433 0x32FF, // 32FF ; COMMON
6434 0x3300, // 3300..3357; KATAKANA
6435 0x3358, // 3358..33FF; COMMON
6436 0x3400, // 3400..4DBF; HAN
6437 0x4DC0, // 4DC0..4DFF; COMMON
6438 0x4E00, // 4E00..9FFF; HAN
6439 0xA000, // A000..A48C; YI
6440 0xA48D, // A48D..A48F; UNKNOWN
6441 0xA490, // A490..A4C6; YI
6442 0xA4C7, // A4C7..A4CF; UNKNOWN
6443 0xA4D0, // A4D0..A4FF; LISU
6444 0xA500, // A500..A62B; VAI
6445 0xA62C, // A62C..A63F; UNKNOWN
6446 0xA640, // A640..A69F; CYRILLIC
6447 0xA6A0, // A6A0..A6F7; BAMUM
6448 0xA6F8, // A6F8..A6FF; UNKNOWN
6449 0xA700, // A700..A721; COMMON
6450 0xA722, // A722..A787; LATIN
6451 0xA788, // A788..A78A; COMMON
6452 0xA78B, // A78B..A7DC; LATIN
6453 0xA7DD, // A7DD..A7F0; UNKNOWN
6454 0xA7F1, // A7F1..A7FF; LATIN
6455 0xA800, // A800..A82C; SYLOTI_NAGRI
6456 0xA82D, // A82D..A82F; UNKNOWN
6457 0xA830, // A830..A839; COMMON
6458 0xA83A, // A83A..A83F; UNKNOWN
6459 0xA840, // A840..A877; PHAGS_PA
6460 0xA878, // A878..A87F; UNKNOWN
6461 0xA880, // A880..A8C5; SAURASHTRA
6462 0xA8C6, // A8C6..A8CD; UNKNOWN
6463 0xA8CE, // A8CE..A8D9; SAURASHTRA
6464 0xA8DA, // A8DA..A8DF; UNKNOWN
6465 0xA8E0, // A8E0..A8FF; DEVANAGARI
6466 0xA900, // A900..A92D; KAYAH_LI
6467 0xA92E, // A92E ; COMMON
6468 0xA92F, // A92F ; KAYAH_LI
6469 0xA930, // A930..A953; REJANG
6470 0xA954, // A954..A95E; UNKNOWN
6471 0xA95F, // A95F ; REJANG
6472 0xA960, // A960..A97C; HANGUL
6473 0xA97D, // A97D..A97F; UNKNOWN
6474 0xA980, // A980..A9CD; JAVANESE
6475 0xA9CE, // A9CE ; UNKNOWN
6476 0xA9CF, // A9CF ; COMMON
6477 0xA9D0, // A9D0..A9D9; JAVANESE
6478 0xA9DA, // A9DA..A9DD; UNKNOWN
6479 0xA9DE, // A9DE..A9DF; JAVANESE
6480 0xA9E0, // A9E0..A9FE; MYANMAR
6481 0xA9FF, // A9FF ; UNKNOWN
6482 0xAA00, // AA00..AA36; CHAM
6483 0xAA37, // AA37..AA3F; UNKNOWN
6484 0xAA40, // AA40..AA4D; CHAM
6485 0xAA4E, // AA4E..AA4F; UNKNOWN
6486 0xAA50, // AA50..AA59; CHAM
6487 0xAA5A, // AA5A..AA5B; UNKNOWN
6488 0xAA5C, // AA5C..AA5F; CHAM
6489 0xAA60, // AA60..AA7F; MYANMAR
6490 0xAA80, // AA80..AAC2; TAI_VIET
6491 0xAAC3, // AAC3..AADA; UNKNOWN
6492 0xAADB, // AADB..AADF; TAI_VIET
6493 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK
6494 0xAAF7, // AAF7..AB00; UNKNOWN
6495 0xAB01, // AB01..AB06; ETHIOPIC
6496 0xAB07, // AB07..AB08; UNKNOWN
6497 0xAB09, // AB09..AB0E; ETHIOPIC
6498 0xAB0F, // AB0F..AB10; UNKNOWN
6499 0xAB11, // AB11..AB16; ETHIOPIC
6500 0xAB17, // AB17..AB1F; UNKNOWN
6501 0xAB20, // AB20..AB26; ETHIOPIC
6502 0xAB27, // AB27 ; UNKNOWN
6503 0xAB28, // AB28..AB2E; ETHIOPIC
6504 0xAB2F, // AB2F ; UNKNOWN
6505 0xAB30, // AB30..AB5A; LATIN
6506 0xAB5B, // AB5B ; COMMON
6507 0xAB5C, // AB5C..AB64; LATIN
6508 0xAB65, // AB65 ; GREEK
6509 0xAB66, // AB66..AB69; LATIN
6510 0xAB6A, // AB6A..AB6B; COMMON
6511 0xAB6C, // AB6C..AB6F; UNKNOWN
6512 0xAB70, // AB70..ABBF; CHEROKEE
6513 0xABC0, // ABC0..ABED; MEETEI_MAYEK
6514 0xABEE, // ABEE..ABEF; UNKNOWN
6515 0xABF0, // ABF0..ABF9; MEETEI_MAYEK
6516 0xABFA, // ABFA..ABFF; UNKNOWN
6517 0xAC00, // AC00..D7A3; HANGUL
6518 0xD7A4, // D7A4..D7AF; UNKNOWN
6519 0xD7B0, // D7B0..D7C6; HANGUL
6520 0xD7C7, // D7C7..D7CA; UNKNOWN
6521 0xD7CB, // D7CB..D7FB; HANGUL
6522 0xD7FC, // D7FC..F8FF; UNKNOWN
6523 0xF900, // F900..FA6D; HAN
6524 0xFA6E, // FA6E..FA6F; UNKNOWN
6525 0xFA70, // FA70..FAD9; HAN
6526 0xFADA, // FADA..FAFF; UNKNOWN
6527 0xFB00, // FB00..FB06; LATIN
6528 0xFB07, // FB07..FB12; UNKNOWN
6529 0xFB13, // FB13..FB17; ARMENIAN
6530 0xFB18, // FB18..FB1C; UNKNOWN
6531 0xFB1D, // FB1D..FB36; HEBREW
6532 0xFB37, // FB37 ; UNKNOWN
6533 0xFB38, // FB38..FB3C; HEBREW
6534 0xFB3D, // FB3D ; UNKNOWN
6535 0xFB3E, // FB3E ; HEBREW
6536 0xFB3F, // FB3F ; UNKNOWN
6537 0xFB40, // FB40..FB41; HEBREW
6538 0xFB42, // FB42 ; UNKNOWN
6539 0xFB43, // FB43..FB44; HEBREW
6540 0xFB45, // FB45 ; UNKNOWN
6541 0xFB46, // FB46..FB4F; HEBREW
6542 0xFB50, // FB50..FD3D; ARABIC
6543 0xFD3E, // FD3E..FD3F; COMMON
6544 0xFD40, // FD40..FDCF; ARABIC
6545 0xFDD0, // FDD0..FDEF; UNKNOWN
6546 0xFDF0, // FDF0..FDFF; ARABIC
6547 0xFE00, // FE00..FE0F; INHERITED
6548 0xFE10, // FE10..FE19; COMMON
6549 0xFE1A, // FE1A..FE1F; UNKNOWN
6550 0xFE20, // FE20..FE2D; INHERITED
6551 0xFE2E, // FE2E..FE2F; CYRILLIC
6552 0xFE30, // FE30..FE52; COMMON
6553 0xFE53, // FE53 ; UNKNOWN
6554 0xFE54, // FE54..FE66; COMMON
6555 0xFE67, // FE67 ; UNKNOWN
6556 0xFE68, // FE68..FE6B; COMMON
6557 0xFE6C, // FE6C..FE6F; UNKNOWN
6558 0xFE70, // FE70..FE74; ARABIC
6559 0xFE75, // FE75 ; UNKNOWN
6560 0xFE76, // FE76..FEFC; ARABIC
6561 0xFEFD, // FEFD..FEFE; UNKNOWN
6562 0xFEFF, // FEFF ; COMMON
6563 0xFF00, // FF00 ; UNKNOWN
6564 0xFF01, // FF01..FF20; COMMON
6565 0xFF21, // FF21..FF3A; LATIN
6566 0xFF3B, // FF3B..FF40; COMMON
6567 0xFF41, // FF41..FF5A; LATIN
6568 0xFF5B, // FF5B..FF65; COMMON
6569 0xFF66, // FF66..FF6F; KATAKANA
6570 0xFF70, // FF70 ; COMMON
6571 0xFF71, // FF71..FF9D; KATAKANA
6572 0xFF9E, // FF9E..FF9F; COMMON
6573 0xFFA0, // FFA0..FFBE; HANGUL
6574 0xFFBF, // FFBF..FFC1; UNKNOWN
6575 0xFFC2, // FFC2..FFC7; HANGUL
6576 0xFFC8, // FFC8..FFC9; UNKNOWN
6577 0xFFCA, // FFCA..FFCF; HANGUL
6578 0xFFD0, // FFD0..FFD1; UNKNOWN
6579 0xFFD2, // FFD2..FFD7; HANGUL
6580 0xFFD8, // FFD8..FFD9; UNKNOWN
6581 0xFFDA, // FFDA..FFDC; HANGUL
6582 0xFFDD, // FFDD..FFDF; UNKNOWN
6583 0xFFE0, // FFE0..FFE6; COMMON
6584 0xFFE7, // FFE7 ; UNKNOWN
6585 0xFFE8, // FFE8..FFEE; COMMON
6586 0xFFEF, // FFEF..FFF8; UNKNOWN
6587 0xFFF9, // FFF9..FFFD; COMMON
6588 0xFFFE, // FFFE..FFFF; UNKNOWN
6589 0x10000, // 10000..1000B; LINEAR_B
6590 0x1000C, // 1000C ; UNKNOWN
6591 0x1000D, // 1000D..10026; LINEAR_B
6592 0x10027, // 10027 ; UNKNOWN
6593 0x10028, // 10028..1003A; LINEAR_B
6594 0x1003B, // 1003B ; UNKNOWN
6595 0x1003C, // 1003C..1003D; LINEAR_B
6596 0x1003E, // 1003E ; UNKNOWN
6597 0x1003F, // 1003F..1004D; LINEAR_B
6598 0x1004E, // 1004E..1004F; UNKNOWN
6599 0x10050, // 10050..1005D; LINEAR_B
6600 0x1005E, // 1005E..1007F; UNKNOWN
6601 0x10080, // 10080..100FA; LINEAR_B
6602 0x100FB, // 100FB..100FF; UNKNOWN
6603 0x10100, // 10100..10102; COMMON
6604 0x10103, // 10103..10106; UNKNOWN
6605 0x10107, // 10107..10133; COMMON
6606 0x10134, // 10134..10136; UNKNOWN
6607 0x10137, // 10137..1013F; COMMON
6608 0x10140, // 10140..1018E; GREEK
6609 0x1018F, // 1018F ; UNKNOWN
6610 0x10190, // 10190..1019C; COMMON
6611 0x1019D, // 1019D..1019F; UNKNOWN
6612 0x101A0, // 101A0 ; GREEK
6613 0x101A1, // 101A1..101CF; UNKNOWN
6614 0x101D0, // 101D0..101FC; COMMON
6615 0x101FD, // 101FD ; INHERITED
6616 0x101FE, // 101FE..1027F; UNKNOWN
6617 0x10280, // 10280..1029C; LYCIAN
6618 0x1029D, // 1029D..1029F; UNKNOWN
6619 0x102A0, // 102A0..102D0; CARIAN
6620 0x102D1, // 102D1..102DF; UNKNOWN
6621 0x102E0, // 102E0 ; INHERITED
6622 0x102E1, // 102E1..102FB; COMMON
6623 0x102FC, // 102FC..102FF; UNKNOWN
6624 0x10300, // 10300..10323; OLD_ITALIC
6625 0x10324, // 10324..1032C; UNKNOWN
6626 0x1032D, // 1032D..1032F; OLD_ITALIC
6627 0x10330, // 10330..1034A; GOTHIC
6628 0x1034B, // 1034B..1034F; UNKNOWN
6629 0x10350, // 10350..1037A; OLD_PERMIC
6630 0x1037B, // 1037B..1037F; UNKNOWN
6631 0x10380, // 10380..1039D; UGARITIC
6632 0x1039E, // 1039E ; UNKNOWN
6633 0x1039F, // 1039F ; UGARITIC
6634 0x103A0, // 103A0..103C3; OLD_PERSIAN
6635 0x103C4, // 103C4..103C7; UNKNOWN
6636 0x103C8, // 103C8..103D5; OLD_PERSIAN
6637 0x103D6, // 103D6..103FF; UNKNOWN
6638 0x10400, // 10400..1044F; DESERET
6639 0x10450, // 10450..1047F; SHAVIAN
6640 0x10480, // 10480..1049D; OSMANYA
6641 0x1049E, // 1049E..1049F; UNKNOWN
6642 0x104A0, // 104A0..104A9; OSMANYA
6643 0x104AA, // 104AA..104AF; UNKNOWN
6644 0x104B0, // 104B0..104D3; OSAGE
6645 0x104D4, // 104D4..104D7; UNKNOWN
6646 0x104D8, // 104D8..104FB; OSAGE
6647 0x104FC, // 104FC..104FF; UNKNOWN
6648 0x10500, // 10500..10527; ELBASAN
6649 0x10528, // 10528..1052F; UNKNOWN
6650 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN
6651 0x10564, // 10564..1056E; UNKNOWN
6652 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN
6653 0x10570, // 10570..1057A; VITHKUQI
6654 0x1057B, // 1057B ; UNKNOWN
6655 0x1057C, // 1057C..1058A; VITHKUQI
6656 0x1058B, // 1058B ; UNKNOWN
6657 0x1058C, // 1058C..10592; VITHKUQI
6658 0x10593, // 10593 ; UNKNOWN
6659 0x10594, // 10594..10595; VITHKUQI
6660 0x10596, // 10596 ; UNKNOWN
6661 0x10597, // 10597..105A1; VITHKUQI
6662 0x105A2, // 105A2 ; UNKNOWN
6663 0x105A3, // 105A3..105B1; VITHKUQI
6664 0x105B2, // 105B2 ; UNKNOWN
6665 0x105B3, // 105B3..105B9; VITHKUQI
6666 0x105BA, // 105BA ; UNKNOWN
6667 0x105BB, // 105BB..105BC; VITHKUQI
6668 0x105BD, // 105BD..105BF; UNKNOWN
6669 0x105C0, // 105C0..105F3; TODHRI
6670 0x105F4, // 105F4..105FF; UNKNOWN
6671 0x10600, // 10600..10736; LINEAR_A
6672 0x10737, // 10737..1073F; UNKNOWN
6673 0x10740, // 10740..10755; LINEAR_A
6674 0x10756, // 10756..1075F; UNKNOWN
6675 0x10760, // 10760..10767; LINEAR_A
6676 0x10768, // 10768..1077F; UNKNOWN
6677 0x10780, // 10780..10785; LATIN
6678 0x10786, // 10786 ; UNKNOWN
6679 0x10787, // 10787..107B0; LATIN
6680 0x107B1, // 107B1 ; UNKNOWN
6681 0x107B2, // 107B2..107BA; LATIN
6682 0x107BB, // 107BB..107FF; UNKNOWN
6683 0x10800, // 10800..10805; CYPRIOT
6684 0x10806, // 10806..10807; UNKNOWN
6685 0x10808, // 10808 ; CYPRIOT
6686 0x10809, // 10809 ; UNKNOWN
6687 0x1080A, // 1080A..10835; CYPRIOT
6688 0x10836, // 10836 ; UNKNOWN
6689 0x10837, // 10837..10838; CYPRIOT
6690 0x10839, // 10839..1083B; UNKNOWN
6691 0x1083C, // 1083C ; CYPRIOT
6692 0x1083D, // 1083D..1083E; UNKNOWN
6693 0x1083F, // 1083F ; CYPRIOT
6694 0x10840, // 10840..10855; IMPERIAL_ARAMAIC
6695 0x10856, // 10856 ; UNKNOWN
6696 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC
6697 0x10860, // 10860..1087F; PALMYRENE
6698 0x10880, // 10880..1089E; NABATAEAN
6699 0x1089F, // 1089F..108A6; UNKNOWN
6700 0x108A7, // 108A7..108AF; NABATAEAN
6701 0x108B0, // 108B0..108DF; UNKNOWN
6702 0x108E0, // 108E0..108F2; HATRAN
6703 0x108F3, // 108F3 ; UNKNOWN
6704 0x108F4, // 108F4..108F5; HATRAN
6705 0x108F6, // 108F6..108FA; UNKNOWN
6706 0x108FB, // 108FB..108FF; HATRAN
6707 0x10900, // 10900..1091B; PHOENICIAN
6708 0x1091C, // 1091C..1091E; UNKNOWN
6709 0x1091F, // 1091F ; PHOENICIAN
6710 0x10920, // 10920..10939; LYDIAN
6711 0x1093A, // 1093A..1093E; UNKNOWN
6712 0x1093F, // 1093F ; LYDIAN
6713 0x10940, // 10940..10959; SIDETIC
6714 0x1095A, // 1095A..1097F; UNKNOWN
6715 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS
6716 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE
6717 0x109B8, // 109B8..109BB; UNKNOWN
6718 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE
6719 0x109D0, // 109D0..109D1; UNKNOWN
6720 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE
6721 0x10A00, // 10A00..10A03; KHAROSHTHI
6722 0x10A04, // 10A04 ; UNKNOWN
6723 0x10A05, // 10A05..10A06; KHAROSHTHI
6724 0x10A07, // 10A07..10A0B; UNKNOWN
6725 0x10A0C, // 10A0C..10A13; KHAROSHTHI
6726 0x10A14, // 10A14 ; UNKNOWN
6727 0x10A15, // 10A15..10A17; KHAROSHTHI
6728 0x10A18, // 10A18 ; UNKNOWN
6729 0x10A19, // 10A19..10A35; KHAROSHTHI
6730 0x10A36, // 10A36..10A37; UNKNOWN
6731 0x10A38, // 10A38..10A3A; KHAROSHTHI
6732 0x10A3B, // 10A3B..10A3E; UNKNOWN
6733 0x10A3F, // 10A3F..10A48; KHAROSHTHI
6734 0x10A49, // 10A49..10A4F; UNKNOWN
6735 0x10A50, // 10A50..10A58; KHAROSHTHI
6736 0x10A59, // 10A59..10A5F; UNKNOWN
6737 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN
6738 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN
6739 0x10AA0, // 10AA0..10ABF; UNKNOWN
6740 0x10AC0, // 10AC0..10AE6; MANICHAEAN
6741 0x10AE7, // 10AE7..10AEA; UNKNOWN
6742 0x10AEB, // 10AEB..10AF6; MANICHAEAN
6743 0x10AF7, // 10AF7..10AFF; UNKNOWN
6744 0x10B00, // 10B00..10B35; AVESTAN
6745 0x10B36, // 10B36..10B38; UNKNOWN
6746 0x10B39, // 10B39..10B3F; AVESTAN
6747 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
6748 0x10B56, // 10B56..10B57; UNKNOWN
6749 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
6750 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
6751 0x10B73, // 10B73..10B77; UNKNOWN
6752 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
6753 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI
6754 0x10B92, // 10B92..10B98; UNKNOWN
6755 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI
6756 0x10B9D, // 10B9D..10BA8; UNKNOWN
6757 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI
6758 0x10BB0, // 10BB0..10BFF; UNKNOWN
6759 0x10C00, // 10C00..10C48; OLD_TURKIC
6760 0x10C49, // 10C49..10C7F; UNKNOWN
6761 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN
6762 0x10CB3, // 10CB3..10CBF; UNKNOWN
6763 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN
6764 0x10CF3, // 10CF3..10CF9; UNKNOWN
6765 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN
6766 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA
6767 0x10D28, // 10D28..10D2F; UNKNOWN
6768 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA
6769 0x10D3A, // 10D3A..10D3F; UNKNOWN
6770 0x10D40, // 10D40..10D65; GARAY
6771 0x10D66, // 10D66..10D68; UNKNOWN
6772 0x10D69, // 10D69..10D85; GARAY
6773 0x10D86, // 10D86..10D8D; UNKNOWN
6774 0x10D8E, // 10D8E..10D8F; GARAY
6775 0x10D90, // 10D90..10E5F; UNKNOWN
6776 0x10E60, // 10E60..10E7E; ARABIC
6777 0x10E7F, // 10E7F ; UNKNOWN
6778 0x10E80, // 10E80..10EA9; YEZIDI
6779 0x10EAA, // 10EAA ; UNKNOWN
6780 0x10EAB, // 10EAB..10EAD; YEZIDI
6781 0x10EAE, // 10EAE..10EAF; UNKNOWN
6782 0x10EB0, // 10EB0..10EB1; YEZIDI
6783 0x10EB2, // 10EB2..10EC1; UNKNOWN
6784 0x10EC2, // 10EC2..10EC7; ARABIC
6785 0x10EC8, // 10EC8..10ECF; UNKNOWN
6786 0x10ED0, // 10ED0..10ED8; ARABIC
6787 0x10ED9, // 10ED9..10EF9; UNKNOWN
6788 0x10EFA, // 10EFA..10EFF; ARABIC
6789 0x10F00, // 10F00..10F27; OLD_SOGDIAN
6790 0x10F28, // 10F28..10F2F; UNKNOWN
6791 0x10F30, // 10F30..10F59; SOGDIAN
6792 0x10F5A, // 10F5A..10F6F; UNKNOWN
6793 0x10F70, // 10F70..10F89; OLD_UYGHUR
6794 0x10F8A, // 10F8A..10FAF; UNKNOWN
6795 0x10FB0, // 10FB0..10FCB; CHORASMIAN
6796 0x10FCC, // 10FCC..10FDF; UNKNOWN
6797 0x10FE0, // 10FE0..10FF6; ELYMAIC
6798 0x10FF7, // 10FF7..10FFF; UNKNOWN
6799 0x11000, // 11000..1104D; BRAHMI
6800 0x1104E, // 1104E..11051; UNKNOWN
6801 0x11052, // 11052..11075; BRAHMI
6802 0x11076, // 11076..1107E; UNKNOWN
6803 0x1107F, // 1107F ; BRAHMI
6804 0x11080, // 11080..110C2; KAITHI
6805 0x110C3, // 110C3..110CC; UNKNOWN
6806 0x110CD, // 110CD ; KAITHI
6807 0x110CE, // 110CE..110CF; UNKNOWN
6808 0x110D0, // 110D0..110E8; SORA_SOMPENG
6809 0x110E9, // 110E9..110EF; UNKNOWN
6810 0x110F0, // 110F0..110F9; SORA_SOMPENG
6811 0x110FA, // 110FA..110FF; UNKNOWN
6812 0x11100, // 11100..11134; CHAKMA
6813 0x11135, // 11135 ; UNKNOWN
6814 0x11136, // 11136..11147; CHAKMA
6815 0x11148, // 11148..1114F; UNKNOWN
6816 0x11150, // 11150..11176; MAHAJANI
6817 0x11177, // 11177..1117F; UNKNOWN
6818 0x11180, // 11180..111DF; SHARADA
6819 0x111E0, // 111E0 ; UNKNOWN
6820 0x111E1, // 111E1..111F4; SINHALA
6821 0x111F5, // 111F5..111FF; UNKNOWN
6822 0x11200, // 11200..11211; KHOJKI
6823 0x11212, // 11212 ; UNKNOWN
6824 0x11213, // 11213..11241; KHOJKI
6825 0x11242, // 11242..1127F; UNKNOWN
6826 0x11280, // 11280..11286; MULTANI
6827 0x11287, // 11287 ; UNKNOWN
6828 0x11288, // 11288 ; MULTANI
6829 0x11289, // 11289 ; UNKNOWN
6830 0x1128A, // 1128A..1128D; MULTANI
6831 0x1128E, // 1128E ; UNKNOWN
6832 0x1128F, // 1128F..1129D; MULTANI
6833 0x1129E, // 1129E ; UNKNOWN
6834 0x1129F, // 1129F..112A9; MULTANI
6835 0x112AA, // 112AA..112AF; UNKNOWN
6836 0x112B0, // 112B0..112EA; KHUDAWADI
6837 0x112EB, // 112EB..112EF; UNKNOWN
6838 0x112F0, // 112F0..112F9; KHUDAWADI
6839 0x112FA, // 112FA..112FF; UNKNOWN
6840 0x11300, // 11300..11303; GRANTHA
6841 0x11304, // 11304 ; UNKNOWN
6842 0x11305, // 11305..1130C; GRANTHA
6843 0x1130D, // 1130D..1130E; UNKNOWN
6844 0x1130F, // 1130F..11310; GRANTHA
6845 0x11311, // 11311..11312; UNKNOWN
6846 0x11313, // 11313..11328; GRANTHA
6847 0x11329, // 11329 ; UNKNOWN
6848 0x1132A, // 1132A..11330; GRANTHA
6849 0x11331, // 11331 ; UNKNOWN
6850 0x11332, // 11332..11333; GRANTHA
6851 0x11334, // 11334 ; UNKNOWN
6852 0x11335, // 11335..11339; GRANTHA
6853 0x1133A, // 1133A ; UNKNOWN
6854 0x1133B, // 1133B ; INHERITED
6855 0x1133C, // 1133C..11344; GRANTHA
6856 0x11345, // 11345..11346; UNKNOWN
6857 0x11347, // 11347..11348; GRANTHA
6858 0x11349, // 11349..1134A; UNKNOWN
6859 0x1134B, // 1134B..1134D; GRANTHA
6860 0x1134E, // 1134E..1134F; UNKNOWN
6861 0x11350, // 11350 ; GRANTHA
6862 0x11351, // 11351..11356; UNKNOWN
6863 0x11357, // 11357 ; GRANTHA
6864 0x11358, // 11358..1135C; UNKNOWN
6865 0x1135D, // 1135D..11363; GRANTHA
6866 0x11364, // 11364..11365; UNKNOWN
6867 0x11366, // 11366..1136C; GRANTHA
6868 0x1136D, // 1136D..1136F; UNKNOWN
6869 0x11370, // 11370..11374; GRANTHA
6870 0x11375, // 11375..1137F; UNKNOWN
6871 0x11380, // 11380..11389; TULU_TIGALARI
6872 0x1138A, // 1138A ; UNKNOWN
6873 0x1138B, // 1138B ; TULU_TIGALARI
6874 0x1138C, // 1138C..1138D; UNKNOWN
6875 0x1138E, // 1138E ; TULU_TIGALARI
6876 0x1138F, // 1138F ; UNKNOWN
6877 0x11390, // 11390..113B5; TULU_TIGALARI
6878 0x113B6, // 113B6 ; UNKNOWN
6879 0x113B7, // 113B7..113C0; TULU_TIGALARI
6880 0x113C1, // 113C1 ; UNKNOWN
6881 0x113C2, // 113C2 ; TULU_TIGALARI
6882 0x113C3, // 113C3..113C4; UNKNOWN
6883 0x113C5, // 113C5 ; TULU_TIGALARI
6884 0x113C6, // 113C6 ; UNKNOWN
6885 0x113C7, // 113C7..113CA; TULU_TIGALARI
6886 0x113CB, // 113CB ; UNKNOWN
6887 0x113CC, // 113CC..113D5; TULU_TIGALARI
6888 0x113D6, // 113D6 ; UNKNOWN
6889 0x113D7, // 113D7..113D8; TULU_TIGALARI
6890 0x113D9, // 113D9..113E0; UNKNOWN
6891 0x113E1, // 113E1..113E2; TULU_TIGALARI
6892 0x113E3, // 113E3..113FF; UNKNOWN
6893 0x11400, // 11400..1145B; NEWA
6894 0x1145C, // 1145C ; UNKNOWN
6895 0x1145D, // 1145D..11461; NEWA
6896 0x11462, // 11462..1147F; UNKNOWN
6897 0x11480, // 11480..114C7; TIRHUTA
6898 0x114C8, // 114C8..114CF; UNKNOWN
6899 0x114D0, // 114D0..114D9; TIRHUTA
6900 0x114DA, // 114DA..1157F; UNKNOWN
6901 0x11580, // 11580..115B5; SIDDHAM
6902 0x115B6, // 115B6..115B7; UNKNOWN
6903 0x115B8, // 115B8..115DD; SIDDHAM
6904 0x115DE, // 115DE..115FF; UNKNOWN
6905 0x11600, // 11600..11644; MODI
6906 0x11645, // 11645..1164F; UNKNOWN
6907 0x11650, // 11650..11659; MODI
6908 0x1165A, // 1165A..1165F; UNKNOWN
6909 0x11660, // 11660..1166C; MONGOLIAN
6910 0x1166D, // 1166D..1167F; UNKNOWN
6911 0x11680, // 11680..116B9; TAKRI
6912 0x116BA, // 116BA..116BF; UNKNOWN
6913 0x116C0, // 116C0..116C9; TAKRI
6914 0x116CA, // 116CA..116CF; UNKNOWN
6915 0x116D0, // 116D0..116E3; MYANMAR
6916 0x116E4, // 116E4..116FF; UNKNOWN
6917 0x11700, // 11700..1171A; AHOM
6918 0x1171B, // 1171B..1171C; UNKNOWN
6919 0x1171D, // 1171D..1172B; AHOM
6920 0x1172C, // 1172C..1172F; UNKNOWN
6921 0x11730, // 11730..11746; AHOM
6922 0x11747, // 11747..117FF; UNKNOWN
6923 0x11800, // 11800..1183B; DOGRA
6924 0x1183C, // 1183C..1189F; UNKNOWN
6925 0x118A0, // 118A0..118F2; WARANG_CITI
6926 0x118F3, // 118F3..118FE; UNKNOWN
6927 0x118FF, // 118FF ; WARANG_CITI
6928 0x11900, // 11900..11906; DIVES_AKURU
6929 0x11907, // 11907..11908; UNKNOWN
6930 0x11909, // 11909 ; DIVES_AKURU
6931 0x1190A, // 1190A..1190B; UNKNOWN
6932 0x1190C, // 1190C..11913; DIVES_AKURU
6933 0x11914, // 11914 ; UNKNOWN
6934 0x11915, // 11915..11916; DIVES_AKURU
6935 0x11917, // 11917 ; UNKNOWN
6936 0x11918, // 11918..11935; DIVES_AKURU
6937 0x11936, // 11936 ; UNKNOWN
6938 0x11937, // 11937..11938; DIVES_AKURU
6939 0x11939, // 11939..1193A; UNKNOWN
6940 0x1193B, // 1193B..11946; DIVES_AKURU
6941 0x11947, // 11947..1194F; UNKNOWN
6942 0x11950, // 11950..11959; DIVES_AKURU
6943 0x1195A, // 1195A..1199F; UNKNOWN
6944 0x119A0, // 119A0..119A7; NANDINAGARI
6945 0x119A8, // 119A8..119A9; UNKNOWN
6946 0x119AA, // 119AA..119D7; NANDINAGARI
6947 0x119D8, // 119D8..119D9; UNKNOWN
6948 0x119DA, // 119DA..119E4; NANDINAGARI
6949 0x119E5, // 119E5..119FF; UNKNOWN
6950 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE
6951 0x11A48, // 11A48..11A4F; UNKNOWN
6952 0x11A50, // 11A50..11AA2; SOYOMBO
6953 0x11AA3, // 11AA3..11AAF; UNKNOWN
6954 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL
6955 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU
6956 0x11AF9, // 11AF9..11AFF; UNKNOWN
6957 0x11B00, // 11B00..11B09; DEVANAGARI
6958 0x11B0A, // 11B0A..11B5F; UNKNOWN
6959 0x11B60, // 11B60..11B67; SHARADA
6960 0x11B68, // 11B68..11BBF; UNKNOWN
6961 0x11BC0, // 11BC0..11BE1; SUNUWAR
6962 0x11BE2, // 11BE2..11BEF; UNKNOWN
6963 0x11BF0, // 11BF0..11BF9; SUNUWAR
6964 0x11BFA, // 11BFA..11BFF; UNKNOWN
6965 0x11C00, // 11C00..11C08; BHAIKSUKI
6966 0x11C09, // 11C09 ; UNKNOWN
6967 0x11C0A, // 11C0A..11C36; BHAIKSUKI
6968 0x11C37, // 11C37 ; UNKNOWN
6969 0x11C38, // 11C38..11C45; BHAIKSUKI
6970 0x11C46, // 11C46..11C4F; UNKNOWN
6971 0x11C50, // 11C50..11C6C; BHAIKSUKI
6972 0x11C6D, // 11C6D..11C6F; UNKNOWN
6973 0x11C70, // 11C70..11C8F; MARCHEN
6974 0x11C90, // 11C90..11C91; UNKNOWN
6975 0x11C92, // 11C92..11CA7; MARCHEN
6976 0x11CA8, // 11CA8 ; UNKNOWN
6977 0x11CA9, // 11CA9..11CB6; MARCHEN
6978 0x11CB7, // 11CB7..11CFF; UNKNOWN
6979 0x11D00, // 11D00..11D06; MASARAM_GONDI
6980 0x11D07, // 11D07 ; UNKNOWN
6981 0x11D08, // 11D08..11D09; MASARAM_GONDI
6982 0x11D0A, // 11D0A ; UNKNOWN
6983 0x11D0B, // 11D0B..11D36; MASARAM_GONDI
6984 0x11D37, // 11D37..11D39; UNKNOWN
6985 0x11D3A, // 11D3A ; MASARAM_GONDI
6986 0x11D3B, // 11D3B ; UNKNOWN
6987 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI
6988 0x11D3E, // 11D3E ; UNKNOWN
6989 0x11D3F, // 11D3F..11D47; MASARAM_GONDI
6990 0x11D48, // 11D48..11D4F; UNKNOWN
6991 0x11D50, // 11D50..11D59; MASARAM_GONDI
6992 0x11D5A, // 11D5A..11D5F; UNKNOWN
6993 0x11D60, // 11D60..11D65; GUNJALA_GONDI
6994 0x11D66, // 11D66 ; UNKNOWN
6995 0x11D67, // 11D67..11D68; GUNJALA_GONDI
6996 0x11D69, // 11D69 ; UNKNOWN
6997 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI
6998 0x11D8F, // 11D8F ; UNKNOWN
6999 0x11D90, // 11D90..11D91; GUNJALA_GONDI
7000 0x11D92, // 11D92 ; UNKNOWN
7001 0x11D93, // 11D93..11D98; GUNJALA_GONDI
7002 0x11D99, // 11D99..11D9F; UNKNOWN
7003 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI
7004 0x11DAA, // 11DAA..11DAF; UNKNOWN
7005 0x11DB0, // 11DB0..11DDB; TOLONG_SIKI
7006 0x11DDC, // 11DDC..11DDF; UNKNOWN
7007 0x11DE0, // 11DE0..11DE9; TOLONG_SIKI
7008 0x11DEA, // 11DEA..11EDF; UNKNOWN
7009 0x11EE0, // 11EE0..11EF8; MAKASAR
7010 0x11EF9, // 11EF9..11EFF; UNKNOWN
7011 0x11F00, // 11F00..11F10; KAWI
7012 0x11F11, // 11F11 ; UNKNOWN
7013 0x11F12, // 11F12..11F3A; KAWI
7014 0x11F3B, // 11F3B..11F3D; UNKNOWN
7015 0x11F3E, // 11F3E..11F5A; KAWI
7016 0x11F5B, // 11F5B..11FAF; UNKNOWN
7017 0x11FB0, // 11FB0 ; LISU
7018 0x11FB1, // 11FB1..11FBF; UNKNOWN
7019 0x11FC0, // 11FC0..11FF1; TAMIL
7020 0x11FF2, // 11FF2..11FFE; UNKNOWN
7021 0x11FFF, // 11FFF ; TAMIL
7022 0x12000, // 12000..12399; CUNEIFORM
7023 0x1239A, // 1239A..123FF; UNKNOWN
7024 0x12400, // 12400..1246E; CUNEIFORM
7025 0x1246F, // 1246F ; UNKNOWN
7026 0x12470, // 12470..12474; CUNEIFORM
7027 0x12475, // 12475..1247F; UNKNOWN
7028 0x12480, // 12480..12543; CUNEIFORM
7029 0x12544, // 12544..12F8F; UNKNOWN
7030 0x12F90, // 12F90..12FF2; CYPRO_MINOAN
7031 0x12FF3, // 12FF3..12FFF; UNKNOWN
7032 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS
7033 0x13456, // 13456..1345F; UNKNOWN
7034 0x13460, // 13460..143FA; EGYPTIAN_HIEROGLYPHS
7035 0x143FB, // 143FB..143FF; UNKNOWN
7036 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS
7037 0x14647, // 14647..160FF; UNKNOWN
7038 0x16100, // 16100..16139; GURUNG_KHEMA
7039 0x1613A, // 1613A..167FF; UNKNOWN
7040 0x16800, // 16800..16A38; BAMUM
7041 0x16A39, // 16A39..16A3F; UNKNOWN
7042 0x16A40, // 16A40..16A5E; MRO
7043 0x16A5F, // 16A5F ; UNKNOWN
7044 0x16A60, // 16A60..16A69; MRO
7045 0x16A6A, // 16A6A..16A6D; UNKNOWN
7046 0x16A6E, // 16A6E..16A6F; MRO
7047 0x16A70, // 16A70..16ABE; TANGSA
7048 0x16ABF, // 16ABF ; UNKNOWN
7049 0x16AC0, // 16AC0..16AC9; TANGSA
7050 0x16ACA, // 16ACA..16ACF; UNKNOWN
7051 0x16AD0, // 16AD0..16AED; BASSA_VAH
7052 0x16AEE, // 16AEE..16AEF; UNKNOWN
7053 0x16AF0, // 16AF0..16AF5; BASSA_VAH
7054 0x16AF6, // 16AF6..16AFF; UNKNOWN
7055 0x16B00, // 16B00..16B45; PAHAWH_HMONG
7056 0x16B46, // 16B46..16B4F; UNKNOWN
7057 0x16B50, // 16B50..16B59; PAHAWH_HMONG
7058 0x16B5A, // 16B5A ; UNKNOWN
7059 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG
7060 0x16B62, // 16B62 ; UNKNOWN
7061 0x16B63, // 16B63..16B77; PAHAWH_HMONG
7062 0x16B78, // 16B78..16B7C; UNKNOWN
7063 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG
7064 0x16B90, // 16B90..16D3F; UNKNOWN
7065 0x16D40, // 16D40..16D79; KIRAT_RAI
7066 0x16D7A, // 16D7A..16E3F; UNKNOWN
7067 0x16E40, // 16E40..16E9A; MEDEFAIDRIN
7068 0x16E9B, // 16E9B..16E9F; UNKNOWN
7069 0x16EA0, // 16EA0..16EB8; BERIA_ERFE
7070 0x16EB9, // 16EB9..16EBA; UNKNOWN
7071 0x16EBB, // 16EBB..16ED3; BERIA_ERFE
7072 0x16ED4, // 16ED4..16EFF; UNKNOWN
7073 0x16F00, // 16F00..16F4A; MIAO
7074 0x16F4B, // 16F4B..16F4E; UNKNOWN
7075 0x16F4F, // 16F4F..16F87; MIAO
7076 0x16F88, // 16F88..16F8E; UNKNOWN
7077 0x16F8F, // 16F8F..16F9F; MIAO
7078 0x16FA0, // 16FA0..16FDF; UNKNOWN
7079 0x16FE0, // 16FE0 ; TANGUT
7080 0x16FE1, // 16FE1 ; NUSHU
7081 0x16FE2, // 16FE2..16FE3; HAN
7082 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT
7083 0x16FE5, // 16FE5..16FEF; UNKNOWN
7084 0x16FF0, // 16FF0..16FF6; HAN
7085 0x16FF7, // 16FF7..16FFF; UNKNOWN
7086 0x17000, // 17000..18AFF; TANGUT
7087 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT
7088 0x18CD6, // 18CD6..18CFE; UNKNOWN
7089 0x18CFF, // 18CFF ; KHITAN_SMALL_SCRIPT
7090 0x18D00, // 18D00..18D1E; TANGUT
7091 0x18D1F, // 18D1F..18D7F; UNKNOWN
7092 0x18D80, // 18D80..18DF2; TANGUT
7093 0x18DF3, // 18DF3..1AFEF; UNKNOWN
7094 0x1AFF0, // 1AFF0..1AFF3; KATAKANA
7095 0x1AFF4, // 1AFF4 ; UNKNOWN
7096 0x1AFF5, // 1AFF5..1AFFB; KATAKANA
7097 0x1AFFC, // 1AFFC ; UNKNOWN
7098 0x1AFFD, // 1AFFD..1AFFE; KATAKANA
7099 0x1AFFF, // 1AFFF ; UNKNOWN
7100 0x1B000, // 1B000 ; KATAKANA
7101 0x1B001, // 1B001..1B11F; HIRAGANA
7102 0x1B120, // 1B120..1B122; KATAKANA
7103 0x1B123, // 1B123..1B131; UNKNOWN
7104 0x1B132, // 1B132 ; HIRAGANA
7105 0x1B133, // 1B133..1B14F; UNKNOWN
7106 0x1B150, // 1B150..1B152; HIRAGANA
7107 0x1B153, // 1B153..1B154; UNKNOWN
7108 0x1B155, // 1B155 ; KATAKANA
7109 0x1B156, // 1B156..1B163; UNKNOWN
7110 0x1B164, // 1B164..1B167; KATAKANA
7111 0x1B168, // 1B168..1B16F; UNKNOWN
7112 0x1B170, // 1B170..1B2FB; NUSHU
7113 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN
7114 0x1BC00, // 1BC00..1BC6A; DUPLOYAN
7115 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN
7116 0x1BC70, // 1BC70..1BC7C; DUPLOYAN
7117 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN
7118 0x1BC80, // 1BC80..1BC88; DUPLOYAN
7119 0x1BC89, // 1BC89..1BC8F; UNKNOWN
7120 0x1BC90, // 1BC90..1BC99; DUPLOYAN
7121 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN
7122 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN
7123 0x1BCA0, // 1BCA0..1BCA3; COMMON
7124 0x1BCA4, // 1BCA4..1CBFF; UNKNOWN
7125 0x1CC00, // 1CC00..1CCFC; COMMON
7126 0x1CCFD, // 1CCFD..1CCFF; UNKNOWN
7127 0x1CD00, // 1CD00..1CEB3; COMMON
7128 0x1CEB4, // 1CEB4..1CEB9; UNKNOWN
7129 0x1CEBA, // 1CEBA..1CED0; COMMON
7130 0x1CED1, // 1CED1..1CEDF; UNKNOWN
7131 0x1CEE0, // 1CEE0..1CEF0; COMMON
7132 0x1CEF1, // 1CEF1..1CEFF; UNKNOWN
7133 0x1CF00, // 1CF00..1CF2D; INHERITED
7134 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN
7135 0x1CF30, // 1CF30..1CF46; INHERITED
7136 0x1CF47, // 1CF47..1CF4F; UNKNOWN
7137 0x1CF50, // 1CF50..1CFC3; COMMON
7138 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN
7139 0x1D000, // 1D000..1D0F5; COMMON
7140 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN
7141 0x1D100, // 1D100..1D126; COMMON
7142 0x1D127, // 1D127..1D128; UNKNOWN
7143 0x1D129, // 1D129..1D166; COMMON
7144 0x1D167, // 1D167..1D169; INHERITED
7145 0x1D16A, // 1D16A..1D17A; COMMON
7146 0x1D17B, // 1D17B..1D182; INHERITED
7147 0x1D183, // 1D183..1D184; COMMON
7148 0x1D185, // 1D185..1D18B; INHERITED
7149 0x1D18C, // 1D18C..1D1A9; COMMON
7150 0x1D1AA, // 1D1AA..1D1AD; INHERITED
7151 0x1D1AE, // 1D1AE..1D1EA; COMMON
7152 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN
7153 0x1D200, // 1D200..1D245; GREEK
7154 0x1D246, // 1D246..1D2BF; UNKNOWN
7155 0x1D2C0, // 1D2C0..1D2D3; COMMON
7156 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN
7157 0x1D2E0, // 1D2E0..1D2F3; COMMON
7158 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN
7159 0x1D300, // 1D300..1D356; COMMON
7160 0x1D357, // 1D357..1D35F; UNKNOWN
7161 0x1D360, // 1D360..1D378; COMMON
7162 0x1D379, // 1D379..1D3FF; UNKNOWN
7163 0x1D400, // 1D400..1D454; COMMON
7164 0x1D455, // 1D455 ; UNKNOWN
7165 0x1D456, // 1D456..1D49C; COMMON
7166 0x1D49D, // 1D49D ; UNKNOWN
7167 0x1D49E, // 1D49E..1D49F; COMMON
7168 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN
7169 0x1D4A2, // 1D4A2 ; COMMON
7170 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN
7171 0x1D4A5, // 1D4A5..1D4A6; COMMON
7172 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN
7173 0x1D4A9, // 1D4A9..1D4AC; COMMON
7174 0x1D4AD, // 1D4AD ; UNKNOWN
7175 0x1D4AE, // 1D4AE..1D4B9; COMMON
7176 0x1D4BA, // 1D4BA ; UNKNOWN
7177 0x1D4BB, // 1D4BB ; COMMON
7178 0x1D4BC, // 1D4BC ; UNKNOWN
7179 0x1D4BD, // 1D4BD..1D4C3; COMMON
7180 0x1D4C4, // 1D4C4 ; UNKNOWN
7181 0x1D4C5, // 1D4C5..1D505; COMMON
7182 0x1D506, // 1D506 ; UNKNOWN
7183 0x1D507, // 1D507..1D50A; COMMON
7184 0x1D50B, // 1D50B..1D50C; UNKNOWN
7185 0x1D50D, // 1D50D..1D514; COMMON
7186 0x1D515, // 1D515 ; UNKNOWN
7187 0x1D516, // 1D516..1D51C; COMMON
7188 0x1D51D, // 1D51D ; UNKNOWN
7189 0x1D51E, // 1D51E..1D539; COMMON
7190 0x1D53A, // 1D53A ; UNKNOWN
7191 0x1D53B, // 1D53B..1D53E; COMMON
7192 0x1D53F, // 1D53F ; UNKNOWN
7193 0x1D540, // 1D540..1D544; COMMON
7194 0x1D545, // 1D545 ; UNKNOWN
7195 0x1D546, // 1D546 ; COMMON
7196 0x1D547, // 1D547..1D549; UNKNOWN
7197 0x1D54A, // 1D54A..1D550; COMMON
7198 0x1D551, // 1D551 ; UNKNOWN
7199 0x1D552, // 1D552..1D6A5; COMMON
7200 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN
7201 0x1D6A8, // 1D6A8..1D7CB; COMMON
7202 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN
7203 0x1D7CE, // 1D7CE..1D7FF; COMMON
7204 0x1D800, // 1D800..1DA8B; SIGNWRITING
7205 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN
7206 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING
7207 0x1DAA0, // 1DAA0 ; UNKNOWN
7208 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING
7209 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN
7210 0x1DF00, // 1DF00..1DF1E; LATIN
7211 0x1DF1F, // 1DF1F..1DF24; UNKNOWN
7212 0x1DF25, // 1DF25..1DF2A; LATIN
7213 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN
7214 0x1E000, // 1E000..1E006; GLAGOLITIC
7215 0x1E007, // 1E007 ; UNKNOWN
7216 0x1E008, // 1E008..1E018; GLAGOLITIC
7217 0x1E019, // 1E019..1E01A; UNKNOWN
7218 0x1E01B, // 1E01B..1E021; GLAGOLITIC
7219 0x1E022, // 1E022 ; UNKNOWN
7220 0x1E023, // 1E023..1E024; GLAGOLITIC
7221 0x1E025, // 1E025 ; UNKNOWN
7222 0x1E026, // 1E026..1E02A; GLAGOLITIC
7223 0x1E02B, // 1E02B..1E02F; UNKNOWN
7224 0x1E030, // 1E030..1E06D; CYRILLIC
7225 0x1E06E, // 1E06E..1E08E; UNKNOWN
7226 0x1E08F, // 1E08F ; CYRILLIC
7227 0x1E090, // 1E090..1E0FF; UNKNOWN
7228 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
7229 0x1E12D, // 1E12D..1E12F; UNKNOWN
7230 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
7231 0x1E13E, // 1E13E..1E13F; UNKNOWN
7232 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
7233 0x1E14A, // 1E14A..1E14D; UNKNOWN
7234 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
7235 0x1E150, // 1E150..1E28F; UNKNOWN
7236 0x1E290, // 1E290..1E2AE; TOTO
7237 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN
7238 0x1E2C0, // 1E2C0..1E2F9; WANCHO
7239 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN
7240 0x1E2FF, // 1E2FF ; WANCHO
7241 0x1E300, // 1E300..1E4CF; UNKNOWN
7242 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI
7243 0x1E4FA, // 1E4FA..1E5CF; UNKNOWN
7244 0x1E5D0, // 1E5D0..1E5FA; OL_ONAL
7245 0x1E5FB, // 1E5FB..1E5FE; UNKNOWN
7246 0x1E5FF, // 1E5FF ; OL_ONAL
7247 0x1E600, // 1E600..1E6BF; UNKNOWN
7248 0x1E6C0, // 1E6C0..1E6DE; TAI_YO
7249 0x1E6DF, // 1E6DF ; UNKNOWN
7250 0x1E6E0, // 1E6E0..1E6F5; TAI_YO
7251 0x1E6F6, // 1E6F6..1E6FD; UNKNOWN
7252 0x1E6FE, // 1E6FE..1E6FF; TAI_YO
7253 0x1E700, // 1E700..1E7DF; UNKNOWN
7254 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC
7255 0x1E7E7, // 1E7E7 ; UNKNOWN
7256 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC
7257 0x1E7EC, // 1E7EC ; UNKNOWN
7258 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC
7259 0x1E7EF, // 1E7EF ; UNKNOWN
7260 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC
7261 0x1E7FF, // 1E7FF ; UNKNOWN
7262 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI
7263 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN
7264 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI
7265 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN
7266 0x1E900, // 1E900..1E94B; ADLAM
7267 0x1E94C, // 1E94C..1E94F; UNKNOWN
7268 0x1E950, // 1E950..1E959; ADLAM
7269 0x1E95A, // 1E95A..1E95D; UNKNOWN
7270 0x1E95E, // 1E95E..1E95F; ADLAM
7271 0x1E960, // 1E960..1EC70; UNKNOWN
7272 0x1EC71, // 1EC71..1ECB4; COMMON
7273 0x1ECB5, // 1ECB5..1ED00; UNKNOWN
7274 0x1ED01, // 1ED01..1ED3D; COMMON
7275 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN
7276 0x1EE00, // 1EE00..1EE03; ARABIC
7277 0x1EE04, // 1EE04 ; UNKNOWN
7278 0x1EE05, // 1EE05..1EE1F; ARABIC
7279 0x1EE20, // 1EE20 ; UNKNOWN
7280 0x1EE21, // 1EE21..1EE22; ARABIC
7281 0x1EE23, // 1EE23 ; UNKNOWN
7282 0x1EE24, // 1EE24 ; ARABIC
7283 0x1EE25, // 1EE25..1EE26; UNKNOWN
7284 0x1EE27, // 1EE27 ; ARABIC
7285 0x1EE28, // 1EE28 ; UNKNOWN
7286 0x1EE29, // 1EE29..1EE32; ARABIC
7287 0x1EE33, // 1EE33 ; UNKNOWN
7288 0x1EE34, // 1EE34..1EE37; ARABIC
7289 0x1EE38, // 1EE38 ; UNKNOWN
7290 0x1EE39, // 1EE39 ; ARABIC
7291 0x1EE3A, // 1EE3A ; UNKNOWN
7292 0x1EE3B, // 1EE3B ; ARABIC
7293 0x1EE3C, // 1EE3C..1EE41; UNKNOWN
7294 0x1EE42, // 1EE42 ; ARABIC
7295 0x1EE43, // 1EE43..1EE46; UNKNOWN
7296 0x1EE47, // 1EE47 ; ARABIC
7297 0x1EE48, // 1EE48 ; UNKNOWN
7298 0x1EE49, // 1EE49 ; ARABIC
7299 0x1EE4A, // 1EE4A ; UNKNOWN
7300 0x1EE4B, // 1EE4B ; ARABIC
7301 0x1EE4C, // 1EE4C ; UNKNOWN
7302 0x1EE4D, // 1EE4D..1EE4F; ARABIC
7303 0x1EE50, // 1EE50 ; UNKNOWN
7304 0x1EE51, // 1EE51..1EE52; ARABIC
7305 0x1EE53, // 1EE53 ; UNKNOWN
7306 0x1EE54, // 1EE54 ; ARABIC
7307 0x1EE55, // 1EE55..1EE56; UNKNOWN
7308 0x1EE57, // 1EE57 ; ARABIC
7309 0x1EE58, // 1EE58 ; UNKNOWN
7310 0x1EE59, // 1EE59 ; ARABIC
7311 0x1EE5A, // 1EE5A ; UNKNOWN
7312 0x1EE5B, // 1EE5B ; ARABIC
7313 0x1EE5C, // 1EE5C ; UNKNOWN
7314 0x1EE5D, // 1EE5D ; ARABIC
7315 0x1EE5E, // 1EE5E ; UNKNOWN
7316 0x1EE5F, // 1EE5F ; ARABIC
7317 0x1EE60, // 1EE60 ; UNKNOWN
7318 0x1EE61, // 1EE61..1EE62; ARABIC
7319 0x1EE63, // 1EE63 ; UNKNOWN
7320 0x1EE64, // 1EE64 ; ARABIC
7321 0x1EE65, // 1EE65..1EE66; UNKNOWN
7322 0x1EE67, // 1EE67..1EE6A; ARABIC
7323 0x1EE6B, // 1EE6B ; UNKNOWN
7324 0x1EE6C, // 1EE6C..1EE72; ARABIC
7325 0x1EE73, // 1EE73 ; UNKNOWN
7326 0x1EE74, // 1EE74..1EE77; ARABIC
7327 0x1EE78, // 1EE78 ; UNKNOWN
7328 0x1EE79, // 1EE79..1EE7C; ARABIC
7329 0x1EE7D, // 1EE7D ; UNKNOWN
7330 0x1EE7E, // 1EE7E ; ARABIC
7331 0x1EE7F, // 1EE7F ; UNKNOWN
7332 0x1EE80, // 1EE80..1EE89; ARABIC
7333 0x1EE8A, // 1EE8A ; UNKNOWN
7334 0x1EE8B, // 1EE8B..1EE9B; ARABIC
7335 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN
7336 0x1EEA1, // 1EEA1..1EEA3; ARABIC
7337 0x1EEA4, // 1EEA4 ; UNKNOWN
7338 0x1EEA5, // 1EEA5..1EEA9; ARABIC
7339 0x1EEAA, // 1EEAA ; UNKNOWN
7340 0x1EEAB, // 1EEAB..1EEBB; ARABIC
7341 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN
7342 0x1EEF0, // 1EEF0..1EEF1; ARABIC
7343 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN
7344 0x1F000, // 1F000..1F02B; COMMON
7345 0x1F02C, // 1F02C..1F02F; UNKNOWN
7346 0x1F030, // 1F030..1F093; COMMON
7347 0x1F094, // 1F094..1F09F; UNKNOWN
7348 0x1F0A0, // 1F0A0..1F0AE; COMMON
7349 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN
7350 0x1F0B1, // 1F0B1..1F0BF; COMMON
7351 0x1F0C0, // 1F0C0 ; UNKNOWN
7352 0x1F0C1, // 1F0C1..1F0CF; COMMON
7353 0x1F0D0, // 1F0D0 ; UNKNOWN
7354 0x1F0D1, // 1F0D1..1F0F5; COMMON
7355 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN
7356 0x1F100, // 1F100..1F1AD; COMMON
7357 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN
7358 0x1F1E6, // 1F1E6..1F1FF; COMMON
7359 0x1F200, // 1F200 ; HIRAGANA
7360 0x1F201, // 1F201..1F202; COMMON
7361 0x1F203, // 1F203..1F20F; UNKNOWN
7362 0x1F210, // 1F210..1F23B; COMMON
7363 0x1F23C, // 1F23C..1F23F; UNKNOWN
7364 0x1F240, // 1F240..1F248; COMMON
7365 0x1F249, // 1F249..1F24F; UNKNOWN
7366 0x1F250, // 1F250..1F251; COMMON
7367 0x1F252, // 1F252..1F25F; UNKNOWN
7368 0x1F260, // 1F260..1F265; COMMON
7369 0x1F266, // 1F266..1F2FF; UNKNOWN
7370 0x1F300, // 1F300..1F6D8; COMMON
7371 0x1F6D9, // 1F6D9..1F6DB; UNKNOWN
7372 0x1F6DC, // 1F6DC..1F6EC; COMMON
7373 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN
7374 0x1F6F0, // 1F6F0..1F6FC; COMMON
7375 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN
7376 0x1F700, // 1F700..1F7D9; COMMON
7377 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN
7378 0x1F7E0, // 1F7E0..1F7EB; COMMON
7379 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN
7380 0x1F7F0, // 1F7F0 ; COMMON
7381 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN
7382 0x1F800, // 1F800..1F80B; COMMON
7383 0x1F80C, // 1F80C..1F80F; UNKNOWN
7384 0x1F810, // 1F810..1F847; COMMON
7385 0x1F848, // 1F848..1F84F; UNKNOWN
7386 0x1F850, // 1F850..1F859; COMMON
7387 0x1F85A, // 1F85A..1F85F; UNKNOWN
7388 0x1F860, // 1F860..1F887; COMMON
7389 0x1F888, // 1F888..1F88F; UNKNOWN
7390 0x1F890, // 1F890..1F8AD; COMMON
7391 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN
7392 0x1F8B0, // 1F8B0..1F8BB; COMMON
7393 0x1F8BC, // 1F8BC..1F8BF; UNKNOWN
7394 0x1F8C0, // 1F8C0..1F8C1; COMMON
7395 0x1F8C2, // 1F8C2..1F8CF; UNKNOWN
7396 0x1F8D0, // 1F8D0..1F8D8; COMMON
7397 0x1F8D9, // 1F8D9..1F8FF; UNKNOWN
7398 0x1F900, // 1F900..1FA57; COMMON
7399 0x1FA58, // 1FA58..1FA5F; UNKNOWN
7400 0x1FA60, // 1FA60..1FA6D; COMMON
7401 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN
7402 0x1FA70, // 1FA70..1FA7C; COMMON
7403 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN
7404 0x1FA80, // 1FA80..1FA8A; COMMON
7405 0x1FA8B, // 1FA8B..1FA8D; UNKNOWN
7406 0x1FA8E, // 1FA8E..1FAC6; COMMON
7407 0x1FAC7, // 1FAC7 ; UNKNOWN
7408 0x1FAC8, // 1FAC8 ; COMMON
7409 0x1FAC9, // 1FAC9..1FACC; UNKNOWN
7410 0x1FACD, // 1FACD..1FADC; COMMON
7411 0x1FADD, // 1FADD..1FADE; UNKNOWN
7412 0x1FADF, // 1FADF..1FAEA; COMMON
7413 0x1FAEB, // 1FAEB..1FAEE; UNKNOWN
7414 0x1FAEF, // 1FAEF..1FAF8; COMMON
7415 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN
7416 0x1FB00, // 1FB00..1FB92; COMMON
7417 0x1FB93, // 1FB93 ; UNKNOWN
7418 0x1FB94, // 1FB94..1FBFA; COMMON
7419 0x1FBFB, // 1FBFB..1FFFF; UNKNOWN
7420 0x20000, // 20000..2A6DF; HAN
7421 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN
7422 0x2A700, // 2A700..2B81D; HAN
7423 0x2B81E, // 2B81E..2B81F; UNKNOWN
7424 0x2B820, // 2B820..2CEAD; HAN
7425 0x2CEAE, // 2CEAE..2CEAF; UNKNOWN
7426 0x2CEB0, // 2CEB0..2EBE0; HAN
7427 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN
7428 0x2EBF0, // 2EBF0..2EE5D; HAN
7429 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN
7430 0x2F800, // 2F800..2FA1D; HAN
7431 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN
7432 0x30000, // 30000..3134A; HAN
7433 0x3134B, // 3134B..3134F; UNKNOWN
7434 0x31350, // 31350..33479; HAN
7435 0x3347A, // 3347A..E0000; UNKNOWN
7436 0xE0001, // E0001 ; COMMON
7437 0xE0002, // E0002..E001F; UNKNOWN
7438 0xE0020, // E0020..E007F; COMMON
7439 0xE0080, // E0080..E00FF; UNKNOWN
7440 0xE0100, // E0100..E01EF; INHERITED
7441 0xE01F0, // E01F0..10FFFF; UNKNOWN
7442 };
7443
7444 private static final UnicodeScript[] scripts = {
7445 COMMON, // 0000..0040
7446 LATIN, // 0041..005A
7447 COMMON, // 005B..0060
7448 LATIN, // 0061..007A
7449 COMMON, // 007B..00A9
7450 LATIN, // 00AA
7451 COMMON, // 00AB..00B9
7452 LATIN, // 00BA
7453 COMMON, // 00BB..00BF
7454 LATIN, // 00C0..00D6
7455 COMMON, // 00D7
7456 LATIN, // 00D8..00F6
7457 COMMON, // 00F7
7458 LATIN, // 00F8..02B8
7459 COMMON, // 02B9..02DF
7460 LATIN, // 02E0..02E4
7461 COMMON, // 02E5..02E9
7462 BOPOMOFO, // 02EA..02EB
7463 COMMON, // 02EC..02FF
7464 INHERITED, // 0300..036F
7465 GREEK, // 0370..0373
7466 COMMON, // 0374
7467 GREEK, // 0375..0377
7468 UNKNOWN, // 0378..0379
7469 GREEK, // 037A..037D
7470 COMMON, // 037E
7471 GREEK, // 037F
7472 UNKNOWN, // 0380..0383
7473 GREEK, // 0384
7474 COMMON, // 0385
7475 GREEK, // 0386
7476 COMMON, // 0387
7477 GREEK, // 0388..038A
7478 UNKNOWN, // 038B
7479 GREEK, // 038C
7480 UNKNOWN, // 038D
7481 GREEK, // 038E..03A1
7482 UNKNOWN, // 03A2
7483 GREEK, // 03A3..03E1
7484 COPTIC, // 03E2..03EF
7485 GREEK, // 03F0..03FF
7486 CYRILLIC, // 0400..0484
7487 INHERITED, // 0485..0486
7488 CYRILLIC, // 0487..052F
7489 UNKNOWN, // 0530
7490 ARMENIAN, // 0531..0556
7491 UNKNOWN, // 0557..0558
7492 ARMENIAN, // 0559..058A
7493 UNKNOWN, // 058B..058C
7494 ARMENIAN, // 058D..058F
7495 UNKNOWN, // 0590
7496 HEBREW, // 0591..05C7
7497 UNKNOWN, // 05C8..05CF
7498 HEBREW, // 05D0..05EA
7499 UNKNOWN, // 05EB..05EE
7500 HEBREW, // 05EF..05F4
7501 UNKNOWN, // 05F5..05FF
7502 ARABIC, // 0600..0604
7503 COMMON, // 0605
7504 ARABIC, // 0606..060B
7505 COMMON, // 060C
7506 ARABIC, // 060D..061A
7507 COMMON, // 061B
7508 ARABIC, // 061C..061E
7509 COMMON, // 061F
7510 ARABIC, // 0620..063F
7511 COMMON, // 0640
7512 ARABIC, // 0641..064A
7513 INHERITED, // 064B..0655
7514 ARABIC, // 0656..066F
7515 INHERITED, // 0670
7516 ARABIC, // 0671..06DC
7517 COMMON, // 06DD
7518 ARABIC, // 06DE..06FF
7519 SYRIAC, // 0700..070D
7520 UNKNOWN, // 070E
7521 SYRIAC, // 070F..074A
7522 UNKNOWN, // 074B..074C
7523 SYRIAC, // 074D..074F
7524 ARABIC, // 0750..077F
7525 THAANA, // 0780..07B1
7526 UNKNOWN, // 07B2..07BF
7527 NKO, // 07C0..07FA
7528 UNKNOWN, // 07FB..07FC
7529 NKO, // 07FD..07FF
7530 SAMARITAN, // 0800..082D
7531 UNKNOWN, // 082E..082F
7532 SAMARITAN, // 0830..083E
7533 UNKNOWN, // 083F
7534 MANDAIC, // 0840..085B
7535 UNKNOWN, // 085C..085D
7536 MANDAIC, // 085E
7537 UNKNOWN, // 085F
7538 SYRIAC, // 0860..086A
7539 UNKNOWN, // 086B..086F
7540 ARABIC, // 0870..0891
7541 UNKNOWN, // 0892..0896
7542 ARABIC, // 0897..08E1
7543 COMMON, // 08E2
7544 ARABIC, // 08E3..08FF
7545 DEVANAGARI, // 0900..0950
7546 INHERITED, // 0951..0954
7547 DEVANAGARI, // 0955..0963
7548 COMMON, // 0964..0965
7549 DEVANAGARI, // 0966..097F
7550 BENGALI, // 0980..0983
7551 UNKNOWN, // 0984
7552 BENGALI, // 0985..098C
7553 UNKNOWN, // 098D..098E
7554 BENGALI, // 098F..0990
7555 UNKNOWN, // 0991..0992
7556 BENGALI, // 0993..09A8
7557 UNKNOWN, // 09A9
7558 BENGALI, // 09AA..09B0
7559 UNKNOWN, // 09B1
7560 BENGALI, // 09B2
7561 UNKNOWN, // 09B3..09B5
7562 BENGALI, // 09B6..09B9
7563 UNKNOWN, // 09BA..09BB
7564 BENGALI, // 09BC..09C4
7565 UNKNOWN, // 09C5..09C6
7566 BENGALI, // 09C7..09C8
7567 UNKNOWN, // 09C9..09CA
7568 BENGALI, // 09CB..09CE
7569 UNKNOWN, // 09CF..09D6
7570 BENGALI, // 09D7
7571 UNKNOWN, // 09D8..09DB
7572 BENGALI, // 09DC..09DD
7573 UNKNOWN, // 09DE
7574 BENGALI, // 09DF..09E3
7575 UNKNOWN, // 09E4..09E5
7576 BENGALI, // 09E6..09FE
7577 UNKNOWN, // 09FF..0A00
7578 GURMUKHI, // 0A01..0A03
7579 UNKNOWN, // 0A04
7580 GURMUKHI, // 0A05..0A0A
7581 UNKNOWN, // 0A0B..0A0E
7582 GURMUKHI, // 0A0F..0A10
7583 UNKNOWN, // 0A11..0A12
7584 GURMUKHI, // 0A13..0A28
7585 UNKNOWN, // 0A29
7586 GURMUKHI, // 0A2A..0A30
7587 UNKNOWN, // 0A31
7588 GURMUKHI, // 0A32..0A33
7589 UNKNOWN, // 0A34
7590 GURMUKHI, // 0A35..0A36
7591 UNKNOWN, // 0A37
7592 GURMUKHI, // 0A38..0A39
7593 UNKNOWN, // 0A3A..0A3B
7594 GURMUKHI, // 0A3C
7595 UNKNOWN, // 0A3D
7596 GURMUKHI, // 0A3E..0A42
7597 UNKNOWN, // 0A43..0A46
7598 GURMUKHI, // 0A47..0A48
7599 UNKNOWN, // 0A49..0A4A
7600 GURMUKHI, // 0A4B..0A4D
7601 UNKNOWN, // 0A4E..0A50
7602 GURMUKHI, // 0A51
7603 UNKNOWN, // 0A52..0A58
7604 GURMUKHI, // 0A59..0A5C
7605 UNKNOWN, // 0A5D
7606 GURMUKHI, // 0A5E
7607 UNKNOWN, // 0A5F..0A65
7608 GURMUKHI, // 0A66..0A76
7609 UNKNOWN, // 0A77..0A80
7610 GUJARATI, // 0A81..0A83
7611 UNKNOWN, // 0A84
7612 GUJARATI, // 0A85..0A8D
7613 UNKNOWN, // 0A8E
7614 GUJARATI, // 0A8F..0A91
7615 UNKNOWN, // 0A92
7616 GUJARATI, // 0A93..0AA8
7617 UNKNOWN, // 0AA9
7618 GUJARATI, // 0AAA..0AB0
7619 UNKNOWN, // 0AB1
7620 GUJARATI, // 0AB2..0AB3
7621 UNKNOWN, // 0AB4
7622 GUJARATI, // 0AB5..0AB9
7623 UNKNOWN, // 0ABA..0ABB
7624 GUJARATI, // 0ABC..0AC5
7625 UNKNOWN, // 0AC6
7626 GUJARATI, // 0AC7..0AC9
7627 UNKNOWN, // 0ACA
7628 GUJARATI, // 0ACB..0ACD
7629 UNKNOWN, // 0ACE..0ACF
7630 GUJARATI, // 0AD0
7631 UNKNOWN, // 0AD1..0ADF
7632 GUJARATI, // 0AE0..0AE3
7633 UNKNOWN, // 0AE4..0AE5
7634 GUJARATI, // 0AE6..0AF1
7635 UNKNOWN, // 0AF2..0AF8
7636 GUJARATI, // 0AF9..0AFF
7637 UNKNOWN, // 0B00
7638 ORIYA, // 0B01..0B03
7639 UNKNOWN, // 0B04
7640 ORIYA, // 0B05..0B0C
7641 UNKNOWN, // 0B0D..0B0E
7642 ORIYA, // 0B0F..0B10
7643 UNKNOWN, // 0B11..0B12
7644 ORIYA, // 0B13..0B28
7645 UNKNOWN, // 0B29
7646 ORIYA, // 0B2A..0B30
7647 UNKNOWN, // 0B31
7648 ORIYA, // 0B32..0B33
7649 UNKNOWN, // 0B34
7650 ORIYA, // 0B35..0B39
7651 UNKNOWN, // 0B3A..0B3B
7652 ORIYA, // 0B3C..0B44
7653 UNKNOWN, // 0B45..0B46
7654 ORIYA, // 0B47..0B48
7655 UNKNOWN, // 0B49..0B4A
7656 ORIYA, // 0B4B..0B4D
7657 UNKNOWN, // 0B4E..0B54
7658 ORIYA, // 0B55..0B57
7659 UNKNOWN, // 0B58..0B5B
7660 ORIYA, // 0B5C..0B5D
7661 UNKNOWN, // 0B5E
7662 ORIYA, // 0B5F..0B63
7663 UNKNOWN, // 0B64..0B65
7664 ORIYA, // 0B66..0B77
7665 UNKNOWN, // 0B78..0B81
7666 TAMIL, // 0B82..0B83
7667 UNKNOWN, // 0B84
7668 TAMIL, // 0B85..0B8A
7669 UNKNOWN, // 0B8B..0B8D
7670 TAMIL, // 0B8E..0B90
7671 UNKNOWN, // 0B91
7672 TAMIL, // 0B92..0B95
7673 UNKNOWN, // 0B96..0B98
7674 TAMIL, // 0B99..0B9A
7675 UNKNOWN, // 0B9B
7676 TAMIL, // 0B9C
7677 UNKNOWN, // 0B9D
7678 TAMIL, // 0B9E..0B9F
7679 UNKNOWN, // 0BA0..0BA2
7680 TAMIL, // 0BA3..0BA4
7681 UNKNOWN, // 0BA5..0BA7
7682 TAMIL, // 0BA8..0BAA
7683 UNKNOWN, // 0BAB..0BAD
7684 TAMIL, // 0BAE..0BB9
7685 UNKNOWN, // 0BBA..0BBD
7686 TAMIL, // 0BBE..0BC2
7687 UNKNOWN, // 0BC3..0BC5
7688 TAMIL, // 0BC6..0BC8
7689 UNKNOWN, // 0BC9
7690 TAMIL, // 0BCA..0BCD
7691 UNKNOWN, // 0BCE..0BCF
7692 TAMIL, // 0BD0
7693 UNKNOWN, // 0BD1..0BD6
7694 TAMIL, // 0BD7
7695 UNKNOWN, // 0BD8..0BE5
7696 TAMIL, // 0BE6..0BFA
7697 UNKNOWN, // 0BFB..0BFF
7698 TELUGU, // 0C00..0C0C
7699 UNKNOWN, // 0C0D
7700 TELUGU, // 0C0E..0C10
7701 UNKNOWN, // 0C11
7702 TELUGU, // 0C12..0C28
7703 UNKNOWN, // 0C29
7704 TELUGU, // 0C2A..0C39
7705 UNKNOWN, // 0C3A..0C3B
7706 TELUGU, // 0C3C..0C44
7707 UNKNOWN, // 0C45
7708 TELUGU, // 0C46..0C48
7709 UNKNOWN, // 0C49
7710 TELUGU, // 0C4A..0C4D
7711 UNKNOWN, // 0C4E..0C54
7712 TELUGU, // 0C55..0C56
7713 UNKNOWN, // 0C57
7714 TELUGU, // 0C58..0C5A
7715 UNKNOWN, // 0C5B
7716 TELUGU, // 0C5C..0C5D
7717 UNKNOWN, // 0C5E..0C5F
7718 TELUGU, // 0C60..0C63
7719 UNKNOWN, // 0C64..0C65
7720 TELUGU, // 0C66..0C6F
7721 UNKNOWN, // 0C70..0C76
7722 TELUGU, // 0C77..0C7F
7723 KANNADA, // 0C80..0C8C
7724 UNKNOWN, // 0C8D
7725 KANNADA, // 0C8E..0C90
7726 UNKNOWN, // 0C91
7727 KANNADA, // 0C92..0CA8
7728 UNKNOWN, // 0CA9
7729 KANNADA, // 0CAA..0CB3
7730 UNKNOWN, // 0CB4
7731 KANNADA, // 0CB5..0CB9
7732 UNKNOWN, // 0CBA..0CBB
7733 KANNADA, // 0CBC..0CC4
7734 UNKNOWN, // 0CC5
7735 KANNADA, // 0CC6..0CC8
7736 UNKNOWN, // 0CC9
7737 KANNADA, // 0CCA..0CCD
7738 UNKNOWN, // 0CCE..0CD4
7739 KANNADA, // 0CD5..0CD6
7740 UNKNOWN, // 0CD7..0CDB
7741 KANNADA, // 0CDC..0CDE
7742 UNKNOWN, // 0CDF
7743 KANNADA, // 0CE0..0CE3
7744 UNKNOWN, // 0CE4..0CE5
7745 KANNADA, // 0CE6..0CEF
7746 UNKNOWN, // 0CF0
7747 KANNADA, // 0CF1..0CF3
7748 UNKNOWN, // 0CF4..0CFF
7749 MALAYALAM, // 0D00..0D0C
7750 UNKNOWN, // 0D0D
7751 MALAYALAM, // 0D0E..0D10
7752 UNKNOWN, // 0D11
7753 MALAYALAM, // 0D12..0D44
7754 UNKNOWN, // 0D45
7755 MALAYALAM, // 0D46..0D48
7756 UNKNOWN, // 0D49
7757 MALAYALAM, // 0D4A..0D4F
7758 UNKNOWN, // 0D50..0D53
7759 MALAYALAM, // 0D54..0D63
7760 UNKNOWN, // 0D64..0D65
7761 MALAYALAM, // 0D66..0D7F
7762 UNKNOWN, // 0D80
7763 SINHALA, // 0D81..0D83
7764 UNKNOWN, // 0D84
7765 SINHALA, // 0D85..0D96
7766 UNKNOWN, // 0D97..0D99
7767 SINHALA, // 0D9A..0DB1
7768 UNKNOWN, // 0DB2
7769 SINHALA, // 0DB3..0DBB
7770 UNKNOWN, // 0DBC
7771 SINHALA, // 0DBD
7772 UNKNOWN, // 0DBE..0DBF
7773 SINHALA, // 0DC0..0DC6
7774 UNKNOWN, // 0DC7..0DC9
7775 SINHALA, // 0DCA
7776 UNKNOWN, // 0DCB..0DCE
7777 SINHALA, // 0DCF..0DD4
7778 UNKNOWN, // 0DD5
7779 SINHALA, // 0DD6
7780 UNKNOWN, // 0DD7
7781 SINHALA, // 0DD8..0DDF
7782 UNKNOWN, // 0DE0..0DE5
7783 SINHALA, // 0DE6..0DEF
7784 UNKNOWN, // 0DF0..0DF1
7785 SINHALA, // 0DF2..0DF4
7786 UNKNOWN, // 0DF5..0E00
7787 THAI, // 0E01..0E3A
7788 UNKNOWN, // 0E3B..0E3E
7789 COMMON, // 0E3F
7790 THAI, // 0E40..0E5B
7791 UNKNOWN, // 0E5C..0E80
7792 LAO, // 0E81..0E82
7793 UNKNOWN, // 0E83
7794 LAO, // 0E84
7795 UNKNOWN, // 0E85
7796 LAO, // 0E86..0E8A
7797 UNKNOWN, // 0E8B
7798 LAO, // 0E8C..0EA3
7799 UNKNOWN, // 0EA4
7800 LAO, // 0EA5
7801 UNKNOWN, // 0EA6
7802 LAO, // 0EA7..0EBD
7803 UNKNOWN, // 0EBE..0EBF
7804 LAO, // 0EC0..0EC4
7805 UNKNOWN, // 0EC5
7806 LAO, // 0EC6
7807 UNKNOWN, // 0EC7
7808 LAO, // 0EC8..0ECE
7809 UNKNOWN, // 0ECF
7810 LAO, // 0ED0..0ED9
7811 UNKNOWN, // 0EDA..0EDB
7812 LAO, // 0EDC..0EDF
7813 UNKNOWN, // 0EE0..0EFF
7814 TIBETAN, // 0F00..0F47
7815 UNKNOWN, // 0F48
7816 TIBETAN, // 0F49..0F6C
7817 UNKNOWN, // 0F6D..0F70
7818 TIBETAN, // 0F71..0F97
7819 UNKNOWN, // 0F98
7820 TIBETAN, // 0F99..0FBC
7821 UNKNOWN, // 0FBD
7822 TIBETAN, // 0FBE..0FCC
7823 UNKNOWN, // 0FCD
7824 TIBETAN, // 0FCE..0FD4
7825 COMMON, // 0FD5..0FD8
7826 TIBETAN, // 0FD9..0FDA
7827 UNKNOWN, // 0FDB..0FFF
7828 MYANMAR, // 1000..109F
7829 GEORGIAN, // 10A0..10C5
7830 UNKNOWN, // 10C6
7831 GEORGIAN, // 10C7
7832 UNKNOWN, // 10C8..10CC
7833 GEORGIAN, // 10CD
7834 UNKNOWN, // 10CE..10CF
7835 GEORGIAN, // 10D0..10FA
7836 COMMON, // 10FB
7837 GEORGIAN, // 10FC..10FF
7838 HANGUL, // 1100..11FF
7839 ETHIOPIC, // 1200..1248
7840 UNKNOWN, // 1249
7841 ETHIOPIC, // 124A..124D
7842 UNKNOWN, // 124E..124F
7843 ETHIOPIC, // 1250..1256
7844 UNKNOWN, // 1257
7845 ETHIOPIC, // 1258
7846 UNKNOWN, // 1259
7847 ETHIOPIC, // 125A..125D
7848 UNKNOWN, // 125E..125F
7849 ETHIOPIC, // 1260..1288
7850 UNKNOWN, // 1289
7851 ETHIOPIC, // 128A..128D
7852 UNKNOWN, // 128E..128F
7853 ETHIOPIC, // 1290..12B0
7854 UNKNOWN, // 12B1
7855 ETHIOPIC, // 12B2..12B5
7856 UNKNOWN, // 12B6..12B7
7857 ETHIOPIC, // 12B8..12BE
7858 UNKNOWN, // 12BF
7859 ETHIOPIC, // 12C0
7860 UNKNOWN, // 12C1
7861 ETHIOPIC, // 12C2..12C5
7862 UNKNOWN, // 12C6..12C7
7863 ETHIOPIC, // 12C8..12D6
7864 UNKNOWN, // 12D7
7865 ETHIOPIC, // 12D8..1310
7866 UNKNOWN, // 1311
7867 ETHIOPIC, // 1312..1315
7868 UNKNOWN, // 1316..1317
7869 ETHIOPIC, // 1318..135A
7870 UNKNOWN, // 135B..135C
7871 ETHIOPIC, // 135D..137C
7872 UNKNOWN, // 137D..137F
7873 ETHIOPIC, // 1380..1399
7874 UNKNOWN, // 139A..139F
7875 CHEROKEE, // 13A0..13F5
7876 UNKNOWN, // 13F6..13F7
7877 CHEROKEE, // 13F8..13FD
7878 UNKNOWN, // 13FE..13FF
7879 CANADIAN_ABORIGINAL, // 1400..167F
7880 OGHAM, // 1680..169C
7881 UNKNOWN, // 169D..169F
7882 RUNIC, // 16A0..16EA
7883 COMMON, // 16EB..16ED
7884 RUNIC, // 16EE..16F8
7885 UNKNOWN, // 16F9..16FF
7886 TAGALOG, // 1700..1715
7887 UNKNOWN, // 1716..171E
7888 TAGALOG, // 171F
7889 HANUNOO, // 1720..1734
7890 COMMON, // 1735..1736
7891 UNKNOWN, // 1737..173F
7892 BUHID, // 1740..1753
7893 UNKNOWN, // 1754..175F
7894 TAGBANWA, // 1760..176C
7895 UNKNOWN, // 176D
7896 TAGBANWA, // 176E..1770
7897 UNKNOWN, // 1771
7898 TAGBANWA, // 1772..1773
7899 UNKNOWN, // 1774..177F
7900 KHMER, // 1780..17DD
7901 UNKNOWN, // 17DE..17DF
7902 KHMER, // 17E0..17E9
7903 UNKNOWN, // 17EA..17EF
7904 KHMER, // 17F0..17F9
7905 UNKNOWN, // 17FA..17FF
7906 MONGOLIAN, // 1800..1801
7907 COMMON, // 1802..1803
7908 MONGOLIAN, // 1804
7909 COMMON, // 1805
7910 MONGOLIAN, // 1806..1819
7911 UNKNOWN, // 181A..181F
7912 MONGOLIAN, // 1820..1878
7913 UNKNOWN, // 1879..187F
7914 MONGOLIAN, // 1880..18AA
7915 UNKNOWN, // 18AB..18AF
7916 CANADIAN_ABORIGINAL, // 18B0..18F5
7917 UNKNOWN, // 18F6..18FF
7918 LIMBU, // 1900..191E
7919 UNKNOWN, // 191F
7920 LIMBU, // 1920..192B
7921 UNKNOWN, // 192C..192F
7922 LIMBU, // 1930..193B
7923 UNKNOWN, // 193C..193F
7924 LIMBU, // 1940
7925 UNKNOWN, // 1941..1943
7926 LIMBU, // 1944..194F
7927 TAI_LE, // 1950..196D
7928 UNKNOWN, // 196E..196F
7929 TAI_LE, // 1970..1974
7930 UNKNOWN, // 1975..197F
7931 NEW_TAI_LUE, // 1980..19AB
7932 UNKNOWN, // 19AC..19AF
7933 NEW_TAI_LUE, // 19B0..19C9
7934 UNKNOWN, // 19CA..19CF
7935 NEW_TAI_LUE, // 19D0..19DA
7936 UNKNOWN, // 19DB..19DD
7937 NEW_TAI_LUE, // 19DE..19DF
7938 KHMER, // 19E0..19FF
7939 BUGINESE, // 1A00..1A1B
7940 UNKNOWN, // 1A1C..1A1D
7941 BUGINESE, // 1A1E..1A1F
7942 TAI_THAM, // 1A20..1A5E
7943 UNKNOWN, // 1A5F
7944 TAI_THAM, // 1A60..1A7C
7945 UNKNOWN, // 1A7D..1A7E
7946 TAI_THAM, // 1A7F..1A89
7947 UNKNOWN, // 1A8A..1A8F
7948 TAI_THAM, // 1A90..1A99
7949 UNKNOWN, // 1A9A..1A9F
7950 TAI_THAM, // 1AA0..1AAD
7951 UNKNOWN, // 1AAE..1AAF
7952 INHERITED, // 1AB0..1ADD
7953 UNKNOWN, // 1ADE..1ADF
7954 INHERITED, // 1AE0..1AEB
7955 UNKNOWN, // 1AEC..1AFF
7956 BALINESE, // 1B00..1B4C
7957 UNKNOWN, // 1B4D
7958 BALINESE, // 1B4E..1B7F
7959 SUNDANESE, // 1B80..1BBF
7960 BATAK, // 1BC0..1BF3
7961 UNKNOWN, // 1BF4..1BFB
7962 BATAK, // 1BFC..1BFF
7963 LEPCHA, // 1C00..1C37
7964 UNKNOWN, // 1C38..1C3A
7965 LEPCHA, // 1C3B..1C49
7966 UNKNOWN, // 1C4A..1C4C
7967 LEPCHA, // 1C4D..1C4F
7968 OL_CHIKI, // 1C50..1C7F
7969 CYRILLIC, // 1C80..1C8A
7970 UNKNOWN, // 1C8B..1C8F
7971 GEORGIAN, // 1C90..1CBA
7972 UNKNOWN, // 1CBB..1CBC
7973 GEORGIAN, // 1CBD..1CBF
7974 SUNDANESE, // 1CC0..1CC7
7975 UNKNOWN, // 1CC8..1CCF
7976 INHERITED, // 1CD0..1CD2
7977 COMMON, // 1CD3
7978 INHERITED, // 1CD4..1CE0
7979 COMMON, // 1CE1
7980 INHERITED, // 1CE2..1CE8
7981 COMMON, // 1CE9..1CEC
7982 INHERITED, // 1CED
7983 COMMON, // 1CEE..1CF3
7984 INHERITED, // 1CF4
7985 COMMON, // 1CF5..1CF7
7986 INHERITED, // 1CF8..1CF9
7987 COMMON, // 1CFA
7988 UNKNOWN, // 1CFB..1CFF
7989 LATIN, // 1D00..1D25
7990 GREEK, // 1D26..1D2A
7991 CYRILLIC, // 1D2B
7992 LATIN, // 1D2C..1D5C
7993 GREEK, // 1D5D..1D61
7994 LATIN, // 1D62..1D65
7995 GREEK, // 1D66..1D6A
7996 LATIN, // 1D6B..1D77
7997 CYRILLIC, // 1D78
7998 LATIN, // 1D79..1DBE
7999 GREEK, // 1DBF
8000 INHERITED, // 1DC0..1DFF
8001 LATIN, // 1E00..1EFF
8002 GREEK, // 1F00..1F15
8003 UNKNOWN, // 1F16..1F17
8004 GREEK, // 1F18..1F1D
8005 UNKNOWN, // 1F1E..1F1F
8006 GREEK, // 1F20..1F45
8007 UNKNOWN, // 1F46..1F47
8008 GREEK, // 1F48..1F4D
8009 UNKNOWN, // 1F4E..1F4F
8010 GREEK, // 1F50..1F57
8011 UNKNOWN, // 1F58
8012 GREEK, // 1F59
8013 UNKNOWN, // 1F5A
8014 GREEK, // 1F5B
8015 UNKNOWN, // 1F5C
8016 GREEK, // 1F5D
8017 UNKNOWN, // 1F5E
8018 GREEK, // 1F5F..1F7D
8019 UNKNOWN, // 1F7E..1F7F
8020 GREEK, // 1F80..1FB4
8021 UNKNOWN, // 1FB5
8022 GREEK, // 1FB6..1FC4
8023 UNKNOWN, // 1FC5
8024 GREEK, // 1FC6..1FD3
8025 UNKNOWN, // 1FD4..1FD5
8026 GREEK, // 1FD6..1FDB
8027 UNKNOWN, // 1FDC
8028 GREEK, // 1FDD..1FEF
8029 UNKNOWN, // 1FF0..1FF1
8030 GREEK, // 1FF2..1FF4
8031 UNKNOWN, // 1FF5
8032 GREEK, // 1FF6..1FFE
8033 UNKNOWN, // 1FFF
8034 COMMON, // 2000..200B
8035 INHERITED, // 200C..200D
8036 COMMON, // 200E..2064
8037 UNKNOWN, // 2065
8038 COMMON, // 2066..2070
8039 LATIN, // 2071
8040 UNKNOWN, // 2072..2073
8041 COMMON, // 2074..207E
8042 LATIN, // 207F
8043 COMMON, // 2080..208E
8044 UNKNOWN, // 208F
8045 LATIN, // 2090..209C
8046 UNKNOWN, // 209D..209F
8047 COMMON, // 20A0..20C1
8048 UNKNOWN, // 20C2..20CF
8049 INHERITED, // 20D0..20F0
8050 UNKNOWN, // 20F1..20FF
8051 COMMON, // 2100..2125
8052 GREEK, // 2126
8053 COMMON, // 2127..2129
8054 LATIN, // 212A..212B
8055 COMMON, // 212C..2131
8056 LATIN, // 2132
8057 COMMON, // 2133..214D
8058 LATIN, // 214E
8059 COMMON, // 214F..215F
8060 LATIN, // 2160..2188
8061 COMMON, // 2189..218B
8062 UNKNOWN, // 218C..218F
8063 COMMON, // 2190..2429
8064 UNKNOWN, // 242A..243F
8065 COMMON, // 2440..244A
8066 UNKNOWN, // 244B..245F
8067 COMMON, // 2460..27FF
8068 BRAILLE, // 2800..28FF
8069 COMMON, // 2900..2B73
8070 UNKNOWN, // 2B74..2B75
8071 COMMON, // 2B76..2BFF
8072 GLAGOLITIC, // 2C00..2C5F
8073 LATIN, // 2C60..2C7F
8074 COPTIC, // 2C80..2CF3
8075 UNKNOWN, // 2CF4..2CF8
8076 COPTIC, // 2CF9..2CFF
8077 GEORGIAN, // 2D00..2D25
8078 UNKNOWN, // 2D26
8079 GEORGIAN, // 2D27
8080 UNKNOWN, // 2D28..2D2C
8081 GEORGIAN, // 2D2D
8082 UNKNOWN, // 2D2E..2D2F
8083 TIFINAGH, // 2D30..2D67
8084 UNKNOWN, // 2D68..2D6E
8085 TIFINAGH, // 2D6F..2D70
8086 UNKNOWN, // 2D71..2D7E
8087 TIFINAGH, // 2D7F
8088 ETHIOPIC, // 2D80..2D96
8089 UNKNOWN, // 2D97..2D9F
8090 ETHIOPIC, // 2DA0..2DA6
8091 UNKNOWN, // 2DA7
8092 ETHIOPIC, // 2DA8..2DAE
8093 UNKNOWN, // 2DAF
8094 ETHIOPIC, // 2DB0..2DB6
8095 UNKNOWN, // 2DB7
8096 ETHIOPIC, // 2DB8..2DBE
8097 UNKNOWN, // 2DBF
8098 ETHIOPIC, // 2DC0..2DC6
8099 UNKNOWN, // 2DC7
8100 ETHIOPIC, // 2DC8..2DCE
8101 UNKNOWN, // 2DCF
8102 ETHIOPIC, // 2DD0..2DD6
8103 UNKNOWN, // 2DD7
8104 ETHIOPIC, // 2DD8..2DDE
8105 UNKNOWN, // 2DDF
8106 CYRILLIC, // 2DE0..2DFF
8107 COMMON, // 2E00..2E5D
8108 UNKNOWN, // 2E5E..2E7F
8109 HAN, // 2E80..2E99
8110 UNKNOWN, // 2E9A
8111 HAN, // 2E9B..2EF3
8112 UNKNOWN, // 2EF4..2EFF
8113 HAN, // 2F00..2FD5
8114 UNKNOWN, // 2FD6..2FEF
8115 COMMON, // 2FF0..3004
8116 HAN, // 3005
8117 COMMON, // 3006
8118 HAN, // 3007
8119 COMMON, // 3008..3020
8120 HAN, // 3021..3029
8121 INHERITED, // 302A..302D
8122 HANGUL, // 302E..302F
8123 COMMON, // 3030..3037
8124 HAN, // 3038..303B
8125 COMMON, // 303C..303F
8126 UNKNOWN, // 3040
8127 HIRAGANA, // 3041..3096
8128 UNKNOWN, // 3097..3098
8129 INHERITED, // 3099..309A
8130 COMMON, // 309B..309C
8131 HIRAGANA, // 309D..309F
8132 COMMON, // 30A0
8133 KATAKANA, // 30A1..30FA
8134 COMMON, // 30FB..30FC
8135 KATAKANA, // 30FD..30FF
8136 UNKNOWN, // 3100..3104
8137 BOPOMOFO, // 3105..312F
8138 UNKNOWN, // 3130
8139 HANGUL, // 3131..318E
8140 UNKNOWN, // 318F
8141 COMMON, // 3190..319F
8142 BOPOMOFO, // 31A0..31BF
8143 COMMON, // 31C0..31E5
8144 UNKNOWN, // 31E6..31EE
8145 COMMON, // 31EF
8146 KATAKANA, // 31F0..31FF
8147 HANGUL, // 3200..321E
8148 UNKNOWN, // 321F
8149 COMMON, // 3220..325F
8150 HANGUL, // 3260..327E
8151 COMMON, // 327F..32CF
8152 KATAKANA, // 32D0..32FE
8153 COMMON, // 32FF
8154 KATAKANA, // 3300..3357
8155 COMMON, // 3358..33FF
8156 HAN, // 3400..4DBF
8157 COMMON, // 4DC0..4DFF
8158 HAN, // 4E00..9FFF
8159 YI, // A000..A48C
8160 UNKNOWN, // A48D..A48F
8161 YI, // A490..A4C6
8162 UNKNOWN, // A4C7..A4CF
8163 LISU, // A4D0..A4FF
8164 VAI, // A500..A62B
8165 UNKNOWN, // A62C..A63F
8166 CYRILLIC, // A640..A69F
8167 BAMUM, // A6A0..A6F7
8168 UNKNOWN, // A6F8..A6FF
8169 COMMON, // A700..A721
8170 LATIN, // A722..A787
8171 COMMON, // A788..A78A
8172 LATIN, // A78B..A7DC
8173 UNKNOWN, // A7DD..A7F0
8174 LATIN, // A7F1..A7FF
8175 SYLOTI_NAGRI, // A800..A82C
8176 UNKNOWN, // A82D..A82F
8177 COMMON, // A830..A839
8178 UNKNOWN, // A83A..A83F
8179 PHAGS_PA, // A840..A877
8180 UNKNOWN, // A878..A87F
8181 SAURASHTRA, // A880..A8C5
8182 UNKNOWN, // A8C6..A8CD
8183 SAURASHTRA, // A8CE..A8D9
8184 UNKNOWN, // A8DA..A8DF
8185 DEVANAGARI, // A8E0..A8FF
8186 KAYAH_LI, // A900..A92D
8187 COMMON, // A92E
8188 KAYAH_LI, // A92F
8189 REJANG, // A930..A953
8190 UNKNOWN, // A954..A95E
8191 REJANG, // A95F
8192 HANGUL, // A960..A97C
8193 UNKNOWN, // A97D..A97F
8194 JAVANESE, // A980..A9CD
8195 UNKNOWN, // A9CE
8196 COMMON, // A9CF
8197 JAVANESE, // A9D0..A9D9
8198 UNKNOWN, // A9DA..A9DD
8199 JAVANESE, // A9DE..A9DF
8200 MYANMAR, // A9E0..A9FE
8201 UNKNOWN, // A9FF
8202 CHAM, // AA00..AA36
8203 UNKNOWN, // AA37..AA3F
8204 CHAM, // AA40..AA4D
8205 UNKNOWN, // AA4E..AA4F
8206 CHAM, // AA50..AA59
8207 UNKNOWN, // AA5A..AA5B
8208 CHAM, // AA5C..AA5F
8209 MYANMAR, // AA60..AA7F
8210 TAI_VIET, // AA80..AAC2
8211 UNKNOWN, // AAC3..AADA
8212 TAI_VIET, // AADB..AADF
8213 MEETEI_MAYEK, // AAE0..AAF6
8214 UNKNOWN, // AAF7..AB00
8215 ETHIOPIC, // AB01..AB06
8216 UNKNOWN, // AB07..AB08
8217 ETHIOPIC, // AB09..AB0E
8218 UNKNOWN, // AB0F..AB10
8219 ETHIOPIC, // AB11..AB16
8220 UNKNOWN, // AB17..AB1F
8221 ETHIOPIC, // AB20..AB26
8222 UNKNOWN, // AB27
8223 ETHIOPIC, // AB28..AB2E
8224 UNKNOWN, // AB2F
8225 LATIN, // AB30..AB5A
8226 COMMON, // AB5B
8227 LATIN, // AB5C..AB64
8228 GREEK, // AB65
8229 LATIN, // AB66..AB69
8230 COMMON, // AB6A..AB6B
8231 UNKNOWN, // AB6C..AB6F
8232 CHEROKEE, // AB70..ABBF
8233 MEETEI_MAYEK, // ABC0..ABED
8234 UNKNOWN, // ABEE..ABEF
8235 MEETEI_MAYEK, // ABF0..ABF9
8236 UNKNOWN, // ABFA..ABFF
8237 HANGUL, // AC00..D7A3
8238 UNKNOWN, // D7A4..D7AF
8239 HANGUL, // D7B0..D7C6
8240 UNKNOWN, // D7C7..D7CA
8241 HANGUL, // D7CB..D7FB
8242 UNKNOWN, // D7FC..F8FF
8243 HAN, // F900..FA6D
8244 UNKNOWN, // FA6E..FA6F
8245 HAN, // FA70..FAD9
8246 UNKNOWN, // FADA..FAFF
8247 LATIN, // FB00..FB06
8248 UNKNOWN, // FB07..FB12
8249 ARMENIAN, // FB13..FB17
8250 UNKNOWN, // FB18..FB1C
8251 HEBREW, // FB1D..FB36
8252 UNKNOWN, // FB37
8253 HEBREW, // FB38..FB3C
8254 UNKNOWN, // FB3D
8255 HEBREW, // FB3E
8256 UNKNOWN, // FB3F
8257 HEBREW, // FB40..FB41
8258 UNKNOWN, // FB42
8259 HEBREW, // FB43..FB44
8260 UNKNOWN, // FB45
8261 HEBREW, // FB46..FB4F
8262 ARABIC, // FB50..FD3D
8263 COMMON, // FD3E..FD3F
8264 ARABIC, // FD40..FDCF
8265 UNKNOWN, // FDD0..FDEF
8266 ARABIC, // FDF0..FDFF
8267 INHERITED, // FE00..FE0F
8268 COMMON, // FE10..FE19
8269 UNKNOWN, // FE1A..FE1F
8270 INHERITED, // FE20..FE2D
8271 CYRILLIC, // FE2E..FE2F
8272 COMMON, // FE30..FE52
8273 UNKNOWN, // FE53
8274 COMMON, // FE54..FE66
8275 UNKNOWN, // FE67
8276 COMMON, // FE68..FE6B
8277 UNKNOWN, // FE6C..FE6F
8278 ARABIC, // FE70..FE74
8279 UNKNOWN, // FE75
8280 ARABIC, // FE76..FEFC
8281 UNKNOWN, // FEFD..FEFE
8282 COMMON, // FEFF
8283 UNKNOWN, // FF00
8284 COMMON, // FF01..FF20
8285 LATIN, // FF21..FF3A
8286 COMMON, // FF3B..FF40
8287 LATIN, // FF41..FF5A
8288 COMMON, // FF5B..FF65
8289 KATAKANA, // FF66..FF6F
8290 COMMON, // FF70
8291 KATAKANA, // FF71..FF9D
8292 COMMON, // FF9E..FF9F
8293 HANGUL, // FFA0..FFBE
8294 UNKNOWN, // FFBF..FFC1
8295 HANGUL, // FFC2..FFC7
8296 UNKNOWN, // FFC8..FFC9
8297 HANGUL, // FFCA..FFCF
8298 UNKNOWN, // FFD0..FFD1
8299 HANGUL, // FFD2..FFD7
8300 UNKNOWN, // FFD8..FFD9
8301 HANGUL, // FFDA..FFDC
8302 UNKNOWN, // FFDD..FFDF
8303 COMMON, // FFE0..FFE6
8304 UNKNOWN, // FFE7
8305 COMMON, // FFE8..FFEE
8306 UNKNOWN, // FFEF..FFF8
8307 COMMON, // FFF9..FFFD
8308 UNKNOWN, // FFFE..FFFF
8309 LINEAR_B, // 10000..1000B
8310 UNKNOWN, // 1000C
8311 LINEAR_B, // 1000D..10026
8312 UNKNOWN, // 10027
8313 LINEAR_B, // 10028..1003A
8314 UNKNOWN, // 1003B
8315 LINEAR_B, // 1003C..1003D
8316 UNKNOWN, // 1003E
8317 LINEAR_B, // 1003F..1004D
8318 UNKNOWN, // 1004E..1004F
8319 LINEAR_B, // 10050..1005D
8320 UNKNOWN, // 1005E..1007F
8321 LINEAR_B, // 10080..100FA
8322 UNKNOWN, // 100FB..100FF
8323 COMMON, // 10100..10102
8324 UNKNOWN, // 10103..10106
8325 COMMON, // 10107..10133
8326 UNKNOWN, // 10134..10136
8327 COMMON, // 10137..1013F
8328 GREEK, // 10140..1018E
8329 UNKNOWN, // 1018F
8330 COMMON, // 10190..1019C
8331 UNKNOWN, // 1019D..1019F
8332 GREEK, // 101A0
8333 UNKNOWN, // 101A1..101CF
8334 COMMON, // 101D0..101FC
8335 INHERITED, // 101FD
8336 UNKNOWN, // 101FE..1027F
8337 LYCIAN, // 10280..1029C
8338 UNKNOWN, // 1029D..1029F
8339 CARIAN, // 102A0..102D0
8340 UNKNOWN, // 102D1..102DF
8341 INHERITED, // 102E0
8342 COMMON, // 102E1..102FB
8343 UNKNOWN, // 102FC..102FF
8344 OLD_ITALIC, // 10300..10323
8345 UNKNOWN, // 10324..1032C
8346 OLD_ITALIC, // 1032D..1032F
8347 GOTHIC, // 10330..1034A
8348 UNKNOWN, // 1034B..1034F
8349 OLD_PERMIC, // 10350..1037A
8350 UNKNOWN, // 1037B..1037F
8351 UGARITIC, // 10380..1039D
8352 UNKNOWN, // 1039E
8353 UGARITIC, // 1039F
8354 OLD_PERSIAN, // 103A0..103C3
8355 UNKNOWN, // 103C4..103C7
8356 OLD_PERSIAN, // 103C8..103D5
8357 UNKNOWN, // 103D6..103FF
8358 DESERET, // 10400..1044F
8359 SHAVIAN, // 10450..1047F
8360 OSMANYA, // 10480..1049D
8361 UNKNOWN, // 1049E..1049F
8362 OSMANYA, // 104A0..104A9
8363 UNKNOWN, // 104AA..104AF
8364 OSAGE, // 104B0..104D3
8365 UNKNOWN, // 104D4..104D7
8366 OSAGE, // 104D8..104FB
8367 UNKNOWN, // 104FC..104FF
8368 ELBASAN, // 10500..10527
8369 UNKNOWN, // 10528..1052F
8370 CAUCASIAN_ALBANIAN, // 10530..10563
8371 UNKNOWN, // 10564..1056E
8372 CAUCASIAN_ALBANIAN, // 1056F
8373 VITHKUQI, // 10570..1057A
8374 UNKNOWN, // 1057B
8375 VITHKUQI, // 1057C..1058A
8376 UNKNOWN, // 1058B
8377 VITHKUQI, // 1058C..10592
8378 UNKNOWN, // 10593
8379 VITHKUQI, // 10594..10595
8380 UNKNOWN, // 10596
8381 VITHKUQI, // 10597..105A1
8382 UNKNOWN, // 105A2
8383 VITHKUQI, // 105A3..105B1
8384 UNKNOWN, // 105B2
8385 VITHKUQI, // 105B3..105B9
8386 UNKNOWN, // 105BA
8387 VITHKUQI, // 105BB..105BC
8388 UNKNOWN, // 105BD..105BF
8389 TODHRI, // 105C0..105F3
8390 UNKNOWN, // 105F4..105FF
8391 LINEAR_A, // 10600..10736
8392 UNKNOWN, // 10737..1073F
8393 LINEAR_A, // 10740..10755
8394 UNKNOWN, // 10756..1075F
8395 LINEAR_A, // 10760..10767
8396 UNKNOWN, // 10768..1077F
8397 LATIN, // 10780..10785
8398 UNKNOWN, // 10786
8399 LATIN, // 10787..107B0
8400 UNKNOWN, // 107B1
8401 LATIN, // 107B2..107BA
8402 UNKNOWN, // 107BB..107FF
8403 CYPRIOT, // 10800..10805
8404 UNKNOWN, // 10806..10807
8405 CYPRIOT, // 10808
8406 UNKNOWN, // 10809
8407 CYPRIOT, // 1080A..10835
8408 UNKNOWN, // 10836
8409 CYPRIOT, // 10837..10838
8410 UNKNOWN, // 10839..1083B
8411 CYPRIOT, // 1083C
8412 UNKNOWN, // 1083D..1083E
8413 CYPRIOT, // 1083F
8414 IMPERIAL_ARAMAIC, // 10840..10855
8415 UNKNOWN, // 10856
8416 IMPERIAL_ARAMAIC, // 10857..1085F
8417 PALMYRENE, // 10860..1087F
8418 NABATAEAN, // 10880..1089E
8419 UNKNOWN, // 1089F..108A6
8420 NABATAEAN, // 108A7..108AF
8421 UNKNOWN, // 108B0..108DF
8422 HATRAN, // 108E0..108F2
8423 UNKNOWN, // 108F3
8424 HATRAN, // 108F4..108F5
8425 UNKNOWN, // 108F6..108FA
8426 HATRAN, // 108FB..108FF
8427 PHOENICIAN, // 10900..1091B
8428 UNKNOWN, // 1091C..1091E
8429 PHOENICIAN, // 1091F
8430 LYDIAN, // 10920..10939
8431 UNKNOWN, // 1093A..1093E
8432 LYDIAN, // 1093F
8433 SIDETIC, // 10940..10959
8434 UNKNOWN, // 1095A..1097F
8435 MEROITIC_HIEROGLYPHS, // 10980..1099F
8436 MEROITIC_CURSIVE, // 109A0..109B7
8437 UNKNOWN, // 109B8..109BB
8438 MEROITIC_CURSIVE, // 109BC..109CF
8439 UNKNOWN, // 109D0..109D1
8440 MEROITIC_CURSIVE, // 109D2..109FF
8441 KHAROSHTHI, // 10A00..10A03
8442 UNKNOWN, // 10A04
8443 KHAROSHTHI, // 10A05..10A06
8444 UNKNOWN, // 10A07..10A0B
8445 KHAROSHTHI, // 10A0C..10A13
8446 UNKNOWN, // 10A14
8447 KHAROSHTHI, // 10A15..10A17
8448 UNKNOWN, // 10A18
8449 KHAROSHTHI, // 10A19..10A35
8450 UNKNOWN, // 10A36..10A37
8451 KHAROSHTHI, // 10A38..10A3A
8452 UNKNOWN, // 10A3B..10A3E
8453 KHAROSHTHI, // 10A3F..10A48
8454 UNKNOWN, // 10A49..10A4F
8455 KHAROSHTHI, // 10A50..10A58
8456 UNKNOWN, // 10A59..10A5F
8457 OLD_SOUTH_ARABIAN, // 10A60..10A7F
8458 OLD_NORTH_ARABIAN, // 10A80..10A9F
8459 UNKNOWN, // 10AA0..10ABF
8460 MANICHAEAN, // 10AC0..10AE6
8461 UNKNOWN, // 10AE7..10AEA
8462 MANICHAEAN, // 10AEB..10AF6
8463 UNKNOWN, // 10AF7..10AFF
8464 AVESTAN, // 10B00..10B35
8465 UNKNOWN, // 10B36..10B38
8466 AVESTAN, // 10B39..10B3F
8467 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55
8468 UNKNOWN, // 10B56..10B57
8469 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F
8470 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72
8471 UNKNOWN, // 10B73..10B77
8472 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F
8473 PSALTER_PAHLAVI, // 10B80..10B91
8474 UNKNOWN, // 10B92..10B98
8475 PSALTER_PAHLAVI, // 10B99..10B9C
8476 UNKNOWN, // 10B9D..10BA8
8477 PSALTER_PAHLAVI, // 10BA9..10BAF
8478 UNKNOWN, // 10BB0..10BFF
8479 OLD_TURKIC, // 10C00..10C48
8480 UNKNOWN, // 10C49..10C7F
8481 OLD_HUNGARIAN, // 10C80..10CB2
8482 UNKNOWN, // 10CB3..10CBF
8483 OLD_HUNGARIAN, // 10CC0..10CF2
8484 UNKNOWN, // 10CF3..10CF9
8485 OLD_HUNGARIAN, // 10CFA..10CFF
8486 HANIFI_ROHINGYA, // 10D00..10D27
8487 UNKNOWN, // 10D28..10D2F
8488 HANIFI_ROHINGYA, // 10D30..10D39
8489 UNKNOWN, // 10D3A..10D3F
8490 GARAY, // 10D40..10D65
8491 UNKNOWN, // 10D66..10D68
8492 GARAY, // 10D69..10D85
8493 UNKNOWN, // 10D86..10D8D
8494 GARAY, // 10D8E..10D8F
8495 UNKNOWN, // 10D90..10E5F
8496 ARABIC, // 10E60..10E7E
8497 UNKNOWN, // 10E7F
8498 YEZIDI, // 10E80..10EA9
8499 UNKNOWN, // 10EAA
8500 YEZIDI, // 10EAB..10EAD
8501 UNKNOWN, // 10EAE..10EAF
8502 YEZIDI, // 10EB0..10EB1
8503 UNKNOWN, // 10EB2..10EC1
8504 ARABIC, // 10EC2..10EC7
8505 UNKNOWN, // 10EC8..10ECF
8506 ARABIC, // 10ED0..10ED8
8507 UNKNOWN, // 10ED9..10EF9
8508 ARABIC, // 10EFA..10EFF
8509 OLD_SOGDIAN, // 10F00..10F27
8510 UNKNOWN, // 10F28..10F2F
8511 SOGDIAN, // 10F30..10F59
8512 UNKNOWN, // 10F5A..10F6F
8513 OLD_UYGHUR, // 10F70..10F89
8514 UNKNOWN, // 10F8A..10FAF
8515 CHORASMIAN, // 10FB0..10FCB
8516 UNKNOWN, // 10FCC..10FDF
8517 ELYMAIC, // 10FE0..10FF6
8518 UNKNOWN, // 10FF7..10FFF
8519 BRAHMI, // 11000..1104D
8520 UNKNOWN, // 1104E..11051
8521 BRAHMI, // 11052..11075
8522 UNKNOWN, // 11076..1107E
8523 BRAHMI, // 1107F
8524 KAITHI, // 11080..110C2
8525 UNKNOWN, // 110C3..110CC
8526 KAITHI, // 110CD
8527 UNKNOWN, // 110CE..110CF
8528 SORA_SOMPENG, // 110D0..110E8
8529 UNKNOWN, // 110E9..110EF
8530 SORA_SOMPENG, // 110F0..110F9
8531 UNKNOWN, // 110FA..110FF
8532 CHAKMA, // 11100..11134
8533 UNKNOWN, // 11135
8534 CHAKMA, // 11136..11147
8535 UNKNOWN, // 11148..1114F
8536 MAHAJANI, // 11150..11176
8537 UNKNOWN, // 11177..1117F
8538 SHARADA, // 11180..111DF
8539 UNKNOWN, // 111E0
8540 SINHALA, // 111E1..111F4
8541 UNKNOWN, // 111F5..111FF
8542 KHOJKI, // 11200..11211
8543 UNKNOWN, // 11212
8544 KHOJKI, // 11213..11241
8545 UNKNOWN, // 11242..1127F
8546 MULTANI, // 11280..11286
8547 UNKNOWN, // 11287
8548 MULTANI, // 11288
8549 UNKNOWN, // 11289
8550 MULTANI, // 1128A..1128D
8551 UNKNOWN, // 1128E
8552 MULTANI, // 1128F..1129D
8553 UNKNOWN, // 1129E
8554 MULTANI, // 1129F..112A9
8555 UNKNOWN, // 112AA..112AF
8556 KHUDAWADI, // 112B0..112EA
8557 UNKNOWN, // 112EB..112EF
8558 KHUDAWADI, // 112F0..112F9
8559 UNKNOWN, // 112FA..112FF
8560 GRANTHA, // 11300..11303
8561 UNKNOWN, // 11304
8562 GRANTHA, // 11305..1130C
8563 UNKNOWN, // 1130D..1130E
8564 GRANTHA, // 1130F..11310
8565 UNKNOWN, // 11311..11312
8566 GRANTHA, // 11313..11328
8567 UNKNOWN, // 11329
8568 GRANTHA, // 1132A..11330
8569 UNKNOWN, // 11331
8570 GRANTHA, // 11332..11333
8571 UNKNOWN, // 11334
8572 GRANTHA, // 11335..11339
8573 UNKNOWN, // 1133A
8574 INHERITED, // 1133B
8575 GRANTHA, // 1133C..11344
8576 UNKNOWN, // 11345..11346
8577 GRANTHA, // 11347..11348
8578 UNKNOWN, // 11349..1134A
8579 GRANTHA, // 1134B..1134D
8580 UNKNOWN, // 1134E..1134F
8581 GRANTHA, // 11350
8582 UNKNOWN, // 11351..11356
8583 GRANTHA, // 11357
8584 UNKNOWN, // 11358..1135C
8585 GRANTHA, // 1135D..11363
8586 UNKNOWN, // 11364..11365
8587 GRANTHA, // 11366..1136C
8588 UNKNOWN, // 1136D..1136F
8589 GRANTHA, // 11370..11374
8590 UNKNOWN, // 11375..1137F
8591 TULU_TIGALARI, // 11380..11389
8592 UNKNOWN, // 1138A
8593 TULU_TIGALARI, // 1138B
8594 UNKNOWN, // 1138C..1138D
8595 TULU_TIGALARI, // 1138E
8596 UNKNOWN, // 1138F
8597 TULU_TIGALARI, // 11390..113B5
8598 UNKNOWN, // 113B6
8599 TULU_TIGALARI, // 113B7..113C0
8600 UNKNOWN, // 113C1
8601 TULU_TIGALARI, // 113C2
8602 UNKNOWN, // 113C3..113C4
8603 TULU_TIGALARI, // 113C5
8604 UNKNOWN, // 113C6
8605 TULU_TIGALARI, // 113C7..113CA
8606 UNKNOWN, // 113CB
8607 TULU_TIGALARI, // 113CC..113D5
8608 UNKNOWN, // 113D6
8609 TULU_TIGALARI, // 113D7..113D8
8610 UNKNOWN, // 113D9..113E0
8611 TULU_TIGALARI, // 113E1..113E2
8612 UNKNOWN, // 113E3..113FF
8613 NEWA, // 11400..1145B
8614 UNKNOWN, // 1145C
8615 NEWA, // 1145D..11461
8616 UNKNOWN, // 11462..1147F
8617 TIRHUTA, // 11480..114C7
8618 UNKNOWN, // 114C8..114CF
8619 TIRHUTA, // 114D0..114D9
8620 UNKNOWN, // 114DA..1157F
8621 SIDDHAM, // 11580..115B5
8622 UNKNOWN, // 115B6..115B7
8623 SIDDHAM, // 115B8..115DD
8624 UNKNOWN, // 115DE..115FF
8625 MODI, // 11600..11644
8626 UNKNOWN, // 11645..1164F
8627 MODI, // 11650..11659
8628 UNKNOWN, // 1165A..1165F
8629 MONGOLIAN, // 11660..1166C
8630 UNKNOWN, // 1166D..1167F
8631 TAKRI, // 11680..116B9
8632 UNKNOWN, // 116BA..116BF
8633 TAKRI, // 116C0..116C9
8634 UNKNOWN, // 116CA..116CF
8635 MYANMAR, // 116D0..116E3
8636 UNKNOWN, // 116E4..116FF
8637 AHOM, // 11700..1171A
8638 UNKNOWN, // 1171B..1171C
8639 AHOM, // 1171D..1172B
8640 UNKNOWN, // 1172C..1172F
8641 AHOM, // 11730..11746
8642 UNKNOWN, // 11747..117FF
8643 DOGRA, // 11800..1183B
8644 UNKNOWN, // 1183C..1189F
8645 WARANG_CITI, // 118A0..118F2
8646 UNKNOWN, // 118F3..118FE
8647 WARANG_CITI, // 118FF
8648 DIVES_AKURU, // 11900..11906
8649 UNKNOWN, // 11907..11908
8650 DIVES_AKURU, // 11909
8651 UNKNOWN, // 1190A..1190B
8652 DIVES_AKURU, // 1190C..11913
8653 UNKNOWN, // 11914
8654 DIVES_AKURU, // 11915..11916
8655 UNKNOWN, // 11917
8656 DIVES_AKURU, // 11918..11935
8657 UNKNOWN, // 11936
8658 DIVES_AKURU, // 11937..11938
8659 UNKNOWN, // 11939..1193A
8660 DIVES_AKURU, // 1193B..11946
8661 UNKNOWN, // 11947..1194F
8662 DIVES_AKURU, // 11950..11959
8663 UNKNOWN, // 1195A..1199F
8664 NANDINAGARI, // 119A0..119A7
8665 UNKNOWN, // 119A8..119A9
8666 NANDINAGARI, // 119AA..119D7
8667 UNKNOWN, // 119D8..119D9
8668 NANDINAGARI, // 119DA..119E4
8669 UNKNOWN, // 119E5..119FF
8670 ZANABAZAR_SQUARE, // 11A00..11A47
8671 UNKNOWN, // 11A48..11A4F
8672 SOYOMBO, // 11A50..11AA2
8673 UNKNOWN, // 11AA3..11AAF
8674 CANADIAN_ABORIGINAL, // 11AB0..11ABF
8675 PAU_CIN_HAU, // 11AC0..11AF8
8676 UNKNOWN, // 11AF9..11AFF
8677 DEVANAGARI, // 11B00..11B09
8678 UNKNOWN, // 11B0A..11B5F
8679 SHARADA, // 11B60..11B67
8680 UNKNOWN, // 11B68..11BBF
8681 SUNUWAR, // 11BC0..11BE1
8682 UNKNOWN, // 11BE2..11BEF
8683 SUNUWAR, // 11BF0..11BF9
8684 UNKNOWN, // 11BFA..11BFF
8685 BHAIKSUKI, // 11C00..11C08
8686 UNKNOWN, // 11C09
8687 BHAIKSUKI, // 11C0A..11C36
8688 UNKNOWN, // 11C37
8689 BHAIKSUKI, // 11C38..11C45
8690 UNKNOWN, // 11C46..11C4F
8691 BHAIKSUKI, // 11C50..11C6C
8692 UNKNOWN, // 11C6D..11C6F
8693 MARCHEN, // 11C70..11C8F
8694 UNKNOWN, // 11C90..11C91
8695 MARCHEN, // 11C92..11CA7
8696 UNKNOWN, // 11CA8
8697 MARCHEN, // 11CA9..11CB6
8698 UNKNOWN, // 11CB7..11CFF
8699 MASARAM_GONDI, // 11D00..11D06
8700 UNKNOWN, // 11D07
8701 MASARAM_GONDI, // 11D08..11D09
8702 UNKNOWN, // 11D0A
8703 MASARAM_GONDI, // 11D0B..11D36
8704 UNKNOWN, // 11D37..11D39
8705 MASARAM_GONDI, // 11D3A
8706 UNKNOWN, // 11D3B
8707 MASARAM_GONDI, // 11D3C..11D3D
8708 UNKNOWN, // 11D3E
8709 MASARAM_GONDI, // 11D3F..11D47
8710 UNKNOWN, // 11D48..11D4F
8711 MASARAM_GONDI, // 11D50..11D59
8712 UNKNOWN, // 11D5A..11D5F
8713 GUNJALA_GONDI, // 11D60..11D65
8714 UNKNOWN, // 11D66
8715 GUNJALA_GONDI, // 11D67..11D68
8716 UNKNOWN, // 11D69
8717 GUNJALA_GONDI, // 11D6A..11D8E
8718 UNKNOWN, // 11D8F
8719 GUNJALA_GONDI, // 11D90..11D91
8720 UNKNOWN, // 11D92
8721 GUNJALA_GONDI, // 11D93..11D98
8722 UNKNOWN, // 11D99..11D9F
8723 GUNJALA_GONDI, // 11DA0..11DA9
8724 UNKNOWN, // 11DAA..11DAF
8725 TOLONG_SIKI, // 11DB0..11DDB
8726 UNKNOWN, // 11DDC..11DDF
8727 TOLONG_SIKI, // 11DE0..11DE9
8728 UNKNOWN, // 11DEA..11EDF
8729 MAKASAR, // 11EE0..11EF8
8730 UNKNOWN, // 11EF9..11EFF
8731 KAWI, // 11F00..11F10
8732 UNKNOWN, // 11F11
8733 KAWI, // 11F12..11F3A
8734 UNKNOWN, // 11F3B..11F3D
8735 KAWI, // 11F3E..11F5A
8736 UNKNOWN, // 11F5B..11FAF
8737 LISU, // 11FB0
8738 UNKNOWN, // 11FB1..11FBF
8739 TAMIL, // 11FC0..11FF1
8740 UNKNOWN, // 11FF2..11FFE
8741 TAMIL, // 11FFF
8742 CUNEIFORM, // 12000..12399
8743 UNKNOWN, // 1239A..123FF
8744 CUNEIFORM, // 12400..1246E
8745 UNKNOWN, // 1246F
8746 CUNEIFORM, // 12470..12474
8747 UNKNOWN, // 12475..1247F
8748 CUNEIFORM, // 12480..12543
8749 UNKNOWN, // 12544..12F8F
8750 CYPRO_MINOAN, // 12F90..12FF2
8751 UNKNOWN, // 12FF3..12FFF
8752 EGYPTIAN_HIEROGLYPHS, // 13000..13455
8753 UNKNOWN, // 13456..1345F
8754 EGYPTIAN_HIEROGLYPHS, // 13460..143FA
8755 UNKNOWN, // 143FB..143FF
8756 ANATOLIAN_HIEROGLYPHS, // 14400..14646
8757 UNKNOWN, // 14647..160FF
8758 GURUNG_KHEMA, // 16100..16139
8759 UNKNOWN, // 1613A..167FF
8760 BAMUM, // 16800..16A38
8761 UNKNOWN, // 16A39..16A3F
8762 MRO, // 16A40..16A5E
8763 UNKNOWN, // 16A5F
8764 MRO, // 16A60..16A69
8765 UNKNOWN, // 16A6A..16A6D
8766 MRO, // 16A6E..16A6F
8767 TANGSA, // 16A70..16ABE
8768 UNKNOWN, // 16ABF
8769 TANGSA, // 16AC0..16AC9
8770 UNKNOWN, // 16ACA..16ACF
8771 BASSA_VAH, // 16AD0..16AED
8772 UNKNOWN, // 16AEE..16AEF
8773 BASSA_VAH, // 16AF0..16AF5
8774 UNKNOWN, // 16AF6..16AFF
8775 PAHAWH_HMONG, // 16B00..16B45
8776 UNKNOWN, // 16B46..16B4F
8777 PAHAWH_HMONG, // 16B50..16B59
8778 UNKNOWN, // 16B5A
8779 PAHAWH_HMONG, // 16B5B..16B61
8780 UNKNOWN, // 16B62
8781 PAHAWH_HMONG, // 16B63..16B77
8782 UNKNOWN, // 16B78..16B7C
8783 PAHAWH_HMONG, // 16B7D..16B8F
8784 UNKNOWN, // 16B90..16D3F
8785 KIRAT_RAI, // 16D40..16D79
8786 UNKNOWN, // 16D7A..16E3F
8787 MEDEFAIDRIN, // 16E40..16E9A
8788 UNKNOWN, // 16E9B..16E9F
8789 BERIA_ERFE, // 16EA0..16EB8
8790 UNKNOWN, // 16EB9..16EBA
8791 BERIA_ERFE, // 16EBB..16ED3
8792 UNKNOWN, // 16ED4..16EFF
8793 MIAO, // 16F00..16F4A
8794 UNKNOWN, // 16F4B..16F4E
8795 MIAO, // 16F4F..16F87
8796 UNKNOWN, // 16F88..16F8E
8797 MIAO, // 16F8F..16F9F
8798 UNKNOWN, // 16FA0..16FDF
8799 TANGUT, // 16FE0
8800 NUSHU, // 16FE1
8801 HAN, // 16FE2..16FE3
8802 KHITAN_SMALL_SCRIPT, // 16FE4
8803 UNKNOWN, // 16FE5..16FEF
8804 HAN, // 16FF0..16FF6
8805 UNKNOWN, // 16FF7..16FFF
8806 TANGUT, // 17000..18AFF
8807 KHITAN_SMALL_SCRIPT, // 18B00..18CD5
8808 UNKNOWN, // 18CD6..18CFE
8809 KHITAN_SMALL_SCRIPT, // 18CFF
8810 TANGUT, // 18D00..18D1E
8811 UNKNOWN, // 18D1F..18D7F
8812 TANGUT, // 18D80..18DF2
8813 UNKNOWN, // 18DF3..1AFEF
8814 KATAKANA, // 1AFF0..1AFF3
8815 UNKNOWN, // 1AFF4
8816 KATAKANA, // 1AFF5..1AFFB
8817 UNKNOWN, // 1AFFC
8818 KATAKANA, // 1AFFD..1AFFE
8819 UNKNOWN, // 1AFFF
8820 KATAKANA, // 1B000
8821 HIRAGANA, // 1B001..1B11F
8822 KATAKANA, // 1B120..1B122
8823 UNKNOWN, // 1B123..1B131
8824 HIRAGANA, // 1B132
8825 UNKNOWN, // 1B133..1B14F
8826 HIRAGANA, // 1B150..1B152
8827 UNKNOWN, // 1B153..1B154
8828 KATAKANA, // 1B155
8829 UNKNOWN, // 1B156..1B163
8830 KATAKANA, // 1B164..1B167
8831 UNKNOWN, // 1B168..1B16F
8832 NUSHU, // 1B170..1B2FB
8833 UNKNOWN, // 1B2FC..1BBFF
8834 DUPLOYAN, // 1BC00..1BC6A
8835 UNKNOWN, // 1BC6B..1BC6F
8836 DUPLOYAN, // 1BC70..1BC7C
8837 UNKNOWN, // 1BC7D..1BC7F
8838 DUPLOYAN, // 1BC80..1BC88
8839 UNKNOWN, // 1BC89..1BC8F
8840 DUPLOYAN, // 1BC90..1BC99
8841 UNKNOWN, // 1BC9A..1BC9B
8842 DUPLOYAN, // 1BC9C..1BC9F
8843 COMMON, // 1BCA0..1BCA3
8844 UNKNOWN, // 1BCA4..1CBFF
8845 COMMON, // 1CC00..1CCFC
8846 UNKNOWN, // 1CCFD..1CCFF
8847 COMMON, // 1CD00..1CEB3
8848 UNKNOWN, // 1CEB4..1CEB9
8849 COMMON, // 1CEBA..1CED0
8850 UNKNOWN, // 1CED1..1CEDF
8851 COMMON, // 1CEE0..1CEF0
8852 UNKNOWN, // 1CEF1..1CEFF
8853 INHERITED, // 1CF00..1CF2D
8854 UNKNOWN, // 1CF2E..1CF2F
8855 INHERITED, // 1CF30..1CF46
8856 UNKNOWN, // 1CF47..1CF4F
8857 COMMON, // 1CF50..1CFC3
8858 UNKNOWN, // 1CFC4..1CFFF
8859 COMMON, // 1D000..1D0F5
8860 UNKNOWN, // 1D0F6..1D0FF
8861 COMMON, // 1D100..1D126
8862 UNKNOWN, // 1D127..1D128
8863 COMMON, // 1D129..1D166
8864 INHERITED, // 1D167..1D169
8865 COMMON, // 1D16A..1D17A
8866 INHERITED, // 1D17B..1D182
8867 COMMON, // 1D183..1D184
8868 INHERITED, // 1D185..1D18B
8869 COMMON, // 1D18C..1D1A9
8870 INHERITED, // 1D1AA..1D1AD
8871 COMMON, // 1D1AE..1D1EA
8872 UNKNOWN, // 1D1EB..1D1FF
8873 GREEK, // 1D200..1D245
8874 UNKNOWN, // 1D246..1D2BF
8875 COMMON, // 1D2C0..1D2D3
8876 UNKNOWN, // 1D2D4..1D2DF
8877 COMMON, // 1D2E0..1D2F3
8878 UNKNOWN, // 1D2F4..1D2FF
8879 COMMON, // 1D300..1D356
8880 UNKNOWN, // 1D357..1D35F
8881 COMMON, // 1D360..1D378
8882 UNKNOWN, // 1D379..1D3FF
8883 COMMON, // 1D400..1D454
8884 UNKNOWN, // 1D455
8885 COMMON, // 1D456..1D49C
8886 UNKNOWN, // 1D49D
8887 COMMON, // 1D49E..1D49F
8888 UNKNOWN, // 1D4A0..1D4A1
8889 COMMON, // 1D4A2
8890 UNKNOWN, // 1D4A3..1D4A4
8891 COMMON, // 1D4A5..1D4A6
8892 UNKNOWN, // 1D4A7..1D4A8
8893 COMMON, // 1D4A9..1D4AC
8894 UNKNOWN, // 1D4AD
8895 COMMON, // 1D4AE..1D4B9
8896 UNKNOWN, // 1D4BA
8897 COMMON, // 1D4BB
8898 UNKNOWN, // 1D4BC
8899 COMMON, // 1D4BD..1D4C3
8900 UNKNOWN, // 1D4C4
8901 COMMON, // 1D4C5..1D505
8902 UNKNOWN, // 1D506
8903 COMMON, // 1D507..1D50A
8904 UNKNOWN, // 1D50B..1D50C
8905 COMMON, // 1D50D..1D514
8906 UNKNOWN, // 1D515
8907 COMMON, // 1D516..1D51C
8908 UNKNOWN, // 1D51D
8909 COMMON, // 1D51E..1D539
8910 UNKNOWN, // 1D53A
8911 COMMON, // 1D53B..1D53E
8912 UNKNOWN, // 1D53F
8913 COMMON, // 1D540..1D544
8914 UNKNOWN, // 1D545
8915 COMMON, // 1D546
8916 UNKNOWN, // 1D547..1D549
8917 COMMON, // 1D54A..1D550
8918 UNKNOWN, // 1D551
8919 COMMON, // 1D552..1D6A5
8920 UNKNOWN, // 1D6A6..1D6A7
8921 COMMON, // 1D6A8..1D7CB
8922 UNKNOWN, // 1D7CC..1D7CD
8923 COMMON, // 1D7CE..1D7FF
8924 SIGNWRITING, // 1D800..1DA8B
8925 UNKNOWN, // 1DA8C..1DA9A
8926 SIGNWRITING, // 1DA9B..1DA9F
8927 UNKNOWN, // 1DAA0
8928 SIGNWRITING, // 1DAA1..1DAAF
8929 UNKNOWN, // 1DAB0..1DEFF
8930 LATIN, // 1DF00..1DF1E
8931 UNKNOWN, // 1DF1F..1DF24
8932 LATIN, // 1DF25..1DF2A
8933 UNKNOWN, // 1DF2B..1DFFF
8934 GLAGOLITIC, // 1E000..1E006
8935 UNKNOWN, // 1E007
8936 GLAGOLITIC, // 1E008..1E018
8937 UNKNOWN, // 1E019..1E01A
8938 GLAGOLITIC, // 1E01B..1E021
8939 UNKNOWN, // 1E022
8940 GLAGOLITIC, // 1E023..1E024
8941 UNKNOWN, // 1E025
8942 GLAGOLITIC, // 1E026..1E02A
8943 UNKNOWN, // 1E02B..1E02F
8944 CYRILLIC, // 1E030..1E06D
8945 UNKNOWN, // 1E06E..1E08E
8946 CYRILLIC, // 1E08F
8947 UNKNOWN, // 1E090..1E0FF
8948 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C
8949 UNKNOWN, // 1E12D..1E12F
8950 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D
8951 UNKNOWN, // 1E13E..1E13F
8952 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149
8953 UNKNOWN, // 1E14A..1E14D
8954 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F
8955 UNKNOWN, // 1E150..1E28F
8956 TOTO, // 1E290..1E2AE
8957 UNKNOWN, // 1E2AF..1E2BF
8958 WANCHO, // 1E2C0..1E2F9
8959 UNKNOWN, // 1E2FA..1E2FE
8960 WANCHO, // 1E2FF
8961 UNKNOWN, // 1E300..1E4CF
8962 NAG_MUNDARI, // 1E4D0..1E4F9
8963 UNKNOWN, // 1E4FA..1E5CF
8964 OL_ONAL, // 1E5D0..1E5FA
8965 UNKNOWN, // 1E5FB..1E5FE
8966 OL_ONAL, // 1E5FF
8967 UNKNOWN, // 1E600..1E6BF
8968 TAI_YO, // 1E6C0..1E6DE
8969 UNKNOWN, // 1E6DF
8970 TAI_YO, // 1E6E0..1E6F5
8971 UNKNOWN, // 1E6F6..1E6FD
8972 TAI_YO, // 1E6FE..1E6FF
8973 UNKNOWN, // 1E700..1E7DF
8974 ETHIOPIC, // 1E7E0..1E7E6
8975 UNKNOWN, // 1E7E7
8976 ETHIOPIC, // 1E7E8..1E7EB
8977 UNKNOWN, // 1E7EC
8978 ETHIOPIC, // 1E7ED..1E7EE
8979 UNKNOWN, // 1E7EF
8980 ETHIOPIC, // 1E7F0..1E7FE
8981 UNKNOWN, // 1E7FF
8982 MENDE_KIKAKUI, // 1E800..1E8C4
8983 UNKNOWN, // 1E8C5..1E8C6
8984 MENDE_KIKAKUI, // 1E8C7..1E8D6
8985 UNKNOWN, // 1E8D7..1E8FF
8986 ADLAM, // 1E900..1E94B
8987 UNKNOWN, // 1E94C..1E94F
8988 ADLAM, // 1E950..1E959
8989 UNKNOWN, // 1E95A..1E95D
8990 ADLAM, // 1E95E..1E95F
8991 UNKNOWN, // 1E960..1EC70
8992 COMMON, // 1EC71..1ECB4
8993 UNKNOWN, // 1ECB5..1ED00
8994 COMMON, // 1ED01..1ED3D
8995 UNKNOWN, // 1ED3E..1EDFF
8996 ARABIC, // 1EE00..1EE03
8997 UNKNOWN, // 1EE04
8998 ARABIC, // 1EE05..1EE1F
8999 UNKNOWN, // 1EE20
9000 ARABIC, // 1EE21..1EE22
9001 UNKNOWN, // 1EE23
9002 ARABIC, // 1EE24
9003 UNKNOWN, // 1EE25..1EE26
9004 ARABIC, // 1EE27
9005 UNKNOWN, // 1EE28
9006 ARABIC, // 1EE29..1EE32
9007 UNKNOWN, // 1EE33
9008 ARABIC, // 1EE34..1EE37
9009 UNKNOWN, // 1EE38
9010 ARABIC, // 1EE39
9011 UNKNOWN, // 1EE3A
9012 ARABIC, // 1EE3B
9013 UNKNOWN, // 1EE3C..1EE41
9014 ARABIC, // 1EE42
9015 UNKNOWN, // 1EE43..1EE46
9016 ARABIC, // 1EE47
9017 UNKNOWN, // 1EE48
9018 ARABIC, // 1EE49
9019 UNKNOWN, // 1EE4A
9020 ARABIC, // 1EE4B
9021 UNKNOWN, // 1EE4C
9022 ARABIC, // 1EE4D..1EE4F
9023 UNKNOWN, // 1EE50
9024 ARABIC, // 1EE51..1EE52
9025 UNKNOWN, // 1EE53
9026 ARABIC, // 1EE54
9027 UNKNOWN, // 1EE55..1EE56
9028 ARABIC, // 1EE57
9029 UNKNOWN, // 1EE58
9030 ARABIC, // 1EE59
9031 UNKNOWN, // 1EE5A
9032 ARABIC, // 1EE5B
9033 UNKNOWN, // 1EE5C
9034 ARABIC, // 1EE5D
9035 UNKNOWN, // 1EE5E
9036 ARABIC, // 1EE5F
9037 UNKNOWN, // 1EE60
9038 ARABIC, // 1EE61..1EE62
9039 UNKNOWN, // 1EE63
9040 ARABIC, // 1EE64
9041 UNKNOWN, // 1EE65..1EE66
9042 ARABIC, // 1EE67..1EE6A
9043 UNKNOWN, // 1EE6B
9044 ARABIC, // 1EE6C..1EE72
9045 UNKNOWN, // 1EE73
9046 ARABIC, // 1EE74..1EE77
9047 UNKNOWN, // 1EE78
9048 ARABIC, // 1EE79..1EE7C
9049 UNKNOWN, // 1EE7D
9050 ARABIC, // 1EE7E
9051 UNKNOWN, // 1EE7F
9052 ARABIC, // 1EE80..1EE89
9053 UNKNOWN, // 1EE8A
9054 ARABIC, // 1EE8B..1EE9B
9055 UNKNOWN, // 1EE9C..1EEA0
9056 ARABIC, // 1EEA1..1EEA3
9057 UNKNOWN, // 1EEA4
9058 ARABIC, // 1EEA5..1EEA9
9059 UNKNOWN, // 1EEAA
9060 ARABIC, // 1EEAB..1EEBB
9061 UNKNOWN, // 1EEBC..1EEEF
9062 ARABIC, // 1EEF0..1EEF1
9063 UNKNOWN, // 1EEF2..1EFFF
9064 COMMON, // 1F000..1F02B
9065 UNKNOWN, // 1F02C..1F02F
9066 COMMON, // 1F030..1F093
9067 UNKNOWN, // 1F094..1F09F
9068 COMMON, // 1F0A0..1F0AE
9069 UNKNOWN, // 1F0AF..1F0B0
9070 COMMON, // 1F0B1..1F0BF
9071 UNKNOWN, // 1F0C0
9072 COMMON, // 1F0C1..1F0CF
9073 UNKNOWN, // 1F0D0
9074 COMMON, // 1F0D1..1F0F5
9075 UNKNOWN, // 1F0F6..1F0FF
9076 COMMON, // 1F100..1F1AD
9077 UNKNOWN, // 1F1AE..1F1E5
9078 COMMON, // 1F1E6..1F1FF
9079 HIRAGANA, // 1F200
9080 COMMON, // 1F201..1F202
9081 UNKNOWN, // 1F203..1F20F
9082 COMMON, // 1F210..1F23B
9083 UNKNOWN, // 1F23C..1F23F
9084 COMMON, // 1F240..1F248
9085 UNKNOWN, // 1F249..1F24F
9086 COMMON, // 1F250..1F251
9087 UNKNOWN, // 1F252..1F25F
9088 COMMON, // 1F260..1F265
9089 UNKNOWN, // 1F266..1F2FF
9090 COMMON, // 1F300..1F6D8
9091 UNKNOWN, // 1F6D9..1F6DB
9092 COMMON, // 1F6DC..1F6EC
9093 UNKNOWN, // 1F6ED..1F6EF
9094 COMMON, // 1F6F0..1F6FC
9095 UNKNOWN, // 1F6FD..1F6FF
9096 COMMON, // 1F700..1F7D9
9097 UNKNOWN, // 1F7DA..1F7DF
9098 COMMON, // 1F7E0..1F7EB
9099 UNKNOWN, // 1F7EC..1F7EF
9100 COMMON, // 1F7F0
9101 UNKNOWN, // 1F7F1..1F7FF
9102 COMMON, // 1F800..1F80B
9103 UNKNOWN, // 1F80C..1F80F
9104 COMMON, // 1F810..1F847
9105 UNKNOWN, // 1F848..1F84F
9106 COMMON, // 1F850..1F859
9107 UNKNOWN, // 1F85A..1F85F
9108 COMMON, // 1F860..1F887
9109 UNKNOWN, // 1F888..1F88F
9110 COMMON, // 1F890..1F8AD
9111 UNKNOWN, // 1F8AE..1F8AF
9112 COMMON, // 1F8B0..1F8BB
9113 UNKNOWN, // 1F8BC..1F8BF
9114 COMMON, // 1F8C0..1F8C1
9115 UNKNOWN, // 1F8C2..1F8CF
9116 COMMON, // 1F8D0..1F8D8
9117 UNKNOWN, // 1F8D9..1F8FF
9118 COMMON, // 1F900..1FA57
9119 UNKNOWN, // 1FA58..1FA5F
9120 COMMON, // 1FA60..1FA6D
9121 UNKNOWN, // 1FA6E..1FA6F
9122 COMMON, // 1FA70..1FA7C
9123 UNKNOWN, // 1FA7D..1FA7F
9124 COMMON, // 1FA80..1FA8A
9125 UNKNOWN, // 1FA8B..1FA8D
9126 COMMON, // 1FA8E..1FAC6
9127 UNKNOWN, // 1FAC7
9128 COMMON, // 1FAC8
9129 UNKNOWN, // 1FAC9..1FACC
9130 COMMON, // 1FACD..1FADC
9131 UNKNOWN, // 1FADD..1FADE
9132 COMMON, // 1FADF..1FAEA
9133 UNKNOWN, // 1FAEB..1FAEE
9134 COMMON, // 1FAEF..1FAF8
9135 UNKNOWN, // 1FAF9..1FAFF
9136 COMMON, // 1FB00..1FB92
9137 UNKNOWN, // 1FB93
9138 COMMON, // 1FB94..1FBFA
9139 UNKNOWN, // 1FBFB..1FFFF
9140 HAN, // 20000..2A6DF
9141 UNKNOWN, // 2A6E0..2A6FF
9142 HAN, // 2A700..2B81D
9143 UNKNOWN, // 2B81E..2B81F
9144 HAN, // 2B820..2CEAD
9145 UNKNOWN, // 2CEAE..2CEAF
9146 HAN, // 2CEB0..2EBE0
9147 UNKNOWN, // 2EBE1..2EBEF
9148 HAN, // 2EBF0..2EE5D
9149 UNKNOWN, // 2EE5E..2F7FF
9150 HAN, // 2F800..2FA1D
9151 UNKNOWN, // 2FA1E..2FFFF
9152 HAN, // 30000..3134A
9153 UNKNOWN, // 3134B..3134F
9154 HAN, // 31350..33479
9155 UNKNOWN, // 3347A..E0000
9156 COMMON, // E0001
9157 UNKNOWN, // E0002..E001F
9158 COMMON, // E0020..E007F
9159 UNKNOWN, // E0080..E00FF
9160 INHERITED, // E0100..E01EF
9161 UNKNOWN, // E01F0..10FFFF
9162 };
9163
9164 private static final HashMap<String, Character.UnicodeScript> aliases;
9165 static {
9166 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1);
9167 aliases.put("ADLM", ADLAM);
9168 aliases.put("AGHB", CAUCASIAN_ALBANIAN);
9169 aliases.put("AHOM", AHOM);
9170 aliases.put("ARAB", ARABIC);
9171 aliases.put("ARMI", IMPERIAL_ARAMAIC);
9172 aliases.put("ARMN", ARMENIAN);
9173 aliases.put("AVST", AVESTAN);
9174 aliases.put("BALI", BALINESE);
9175 aliases.put("BAMU", BAMUM);
9176 aliases.put("BASS", BASSA_VAH);
9177 aliases.put("BATK", BATAK);
9178 aliases.put("BENG", BENGALI);
9179 aliases.put("BERF", BERIA_ERFE);
9180 aliases.put("BHKS", BHAIKSUKI);
9181 aliases.put("BOPO", BOPOMOFO);
9182 aliases.put("BRAH", BRAHMI);
9183 aliases.put("BRAI", BRAILLE);
9184 aliases.put("BUGI", BUGINESE);
9185 aliases.put("BUHD", BUHID);
9186 aliases.put("CAKM", CHAKMA);
9187 aliases.put("CANS", CANADIAN_ABORIGINAL);
9188 aliases.put("CARI", CARIAN);
9189 aliases.put("CHAM", CHAM);
9190 aliases.put("CHER", CHEROKEE);
9191 aliases.put("CHRS", CHORASMIAN);
9192 aliases.put("COPT", COPTIC);
9193 aliases.put("CPMN", CYPRO_MINOAN);
9194 aliases.put("CPRT", CYPRIOT);
9195 aliases.put("CYRL", CYRILLIC);
9196 aliases.put("DEVA", DEVANAGARI);
9197 aliases.put("DIAK", DIVES_AKURU);
9198 aliases.put("DOGR", DOGRA);
9199 aliases.put("DSRT", DESERET);
9200 aliases.put("DUPL", DUPLOYAN);
9201 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
9202 aliases.put("ELBA", ELBASAN);
9203 aliases.put("ELYM", ELYMAIC);
9204 aliases.put("ETHI", ETHIOPIC);
9205 aliases.put("GARA", GARAY);
9206 aliases.put("GEOR", GEORGIAN);
9207 aliases.put("GLAG", GLAGOLITIC);
9208 aliases.put("GONG", GUNJALA_GONDI);
9209 aliases.put("GONM", MASARAM_GONDI);
9210 aliases.put("GOTH", GOTHIC);
9211 aliases.put("GRAN", GRANTHA);
9212 aliases.put("GREK", GREEK);
9213 aliases.put("GUJR", GUJARATI);
9214 aliases.put("GUKH", GURUNG_KHEMA);
9215 aliases.put("GURU", GURMUKHI);
9216 aliases.put("HANG", HANGUL);
9217 aliases.put("HANI", HAN);
9218 aliases.put("HANO", HANUNOO);
9219 aliases.put("HATR", HATRAN);
9220 aliases.put("HEBR", HEBREW);
9221 aliases.put("HIRA", HIRAGANA);
9222 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
9223 aliases.put("HMNG", PAHAWH_HMONG);
9224 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
9225 aliases.put("HUNG", OLD_HUNGARIAN);
9226 aliases.put("ITAL", OLD_ITALIC);
9227 aliases.put("JAVA", JAVANESE);
9228 aliases.put("KALI", KAYAH_LI);
9229 aliases.put("KANA", KATAKANA);
9230 aliases.put("KAWI", KAWI);
9231 aliases.put("KHAR", KHAROSHTHI);
9232 aliases.put("KHMR", KHMER);
9233 aliases.put("KHOJ", KHOJKI);
9234 aliases.put("KITS", KHITAN_SMALL_SCRIPT);
9235 aliases.put("KNDA", KANNADA);
9236 aliases.put("KRAI", KIRAT_RAI);
9237 aliases.put("KTHI", KAITHI);
9238 aliases.put("LANA", TAI_THAM);
9239 aliases.put("LAOO", LAO);
9240 aliases.put("LATN", LATIN);
9241 aliases.put("LEPC", LEPCHA);
9242 aliases.put("LIMB", LIMBU);
9243 aliases.put("LINA", LINEAR_A);
9244 aliases.put("LINB", LINEAR_B);
9245 aliases.put("LISU", LISU);
9246 aliases.put("LYCI", LYCIAN);
9247 aliases.put("LYDI", LYDIAN);
9248 aliases.put("MAHJ", MAHAJANI);
9249 aliases.put("MAKA", MAKASAR);
9250 aliases.put("MAND", MANDAIC);
9251 aliases.put("MANI", MANICHAEAN);
9252 aliases.put("MARC", MARCHEN);
9253 aliases.put("MEDF", MEDEFAIDRIN);
9254 aliases.put("MEND", MENDE_KIKAKUI);
9255 aliases.put("MERC", MEROITIC_CURSIVE);
9256 aliases.put("MERO", MEROITIC_HIEROGLYPHS);
9257 aliases.put("MLYM", MALAYALAM);
9258 aliases.put("MODI", MODI);
9259 aliases.put("MONG", MONGOLIAN);
9260 aliases.put("MROO", MRO);
9261 aliases.put("MTEI", MEETEI_MAYEK);
9262 aliases.put("MULT", MULTANI);
9263 aliases.put("MYMR", MYANMAR);
9264 aliases.put("NAGM", NAG_MUNDARI);
9265 aliases.put("NAND", NANDINAGARI);
9266 aliases.put("NARB", OLD_NORTH_ARABIAN);
9267 aliases.put("NBAT", NABATAEAN);
9268 aliases.put("NEWA", NEWA);
9269 aliases.put("NKOO", NKO);
9270 aliases.put("NSHU", NUSHU);
9271 aliases.put("OGAM", OGHAM);
9272 aliases.put("OLCK", OL_CHIKI);
9273 aliases.put("ONAO", OL_ONAL);
9274 aliases.put("ORKH", OLD_TURKIC);
9275 aliases.put("ORYA", ORIYA);
9276 aliases.put("OSGE", OSAGE);
9277 aliases.put("OSMA", OSMANYA);
9278 aliases.put("OUGR", OLD_UYGHUR);
9279 aliases.put("PALM", PALMYRENE);
9280 aliases.put("PAUC", PAU_CIN_HAU);
9281 aliases.put("PERM", OLD_PERMIC);
9282 aliases.put("PHAG", PHAGS_PA);
9283 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
9284 aliases.put("PHLP", PSALTER_PAHLAVI);
9285 aliases.put("PHNX", PHOENICIAN);
9286 aliases.put("PLRD", MIAO);
9287 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
9288 aliases.put("RJNG", REJANG);
9289 aliases.put("ROHG", HANIFI_ROHINGYA);
9290 aliases.put("RUNR", RUNIC);
9291 aliases.put("SAMR", SAMARITAN);
9292 aliases.put("SARB", OLD_SOUTH_ARABIAN);
9293 aliases.put("SAUR", SAURASHTRA);
9294 aliases.put("SGNW", SIGNWRITING);
9295 aliases.put("SHAW", SHAVIAN);
9296 aliases.put("SHRD", SHARADA);
9297 aliases.put("SIDD", SIDDHAM);
9298 aliases.put("SIDT", SIDETIC);
9299 aliases.put("SIND", KHUDAWADI);
9300 aliases.put("SINH", SINHALA);
9301 aliases.put("SOGD", SOGDIAN);
9302 aliases.put("SOGO", OLD_SOGDIAN);
9303 aliases.put("SORA", SORA_SOMPENG);
9304 aliases.put("SOYO", SOYOMBO);
9305 aliases.put("SUND", SUNDANESE);
9306 aliases.put("SUNU", SUNUWAR);
9307 aliases.put("SYLO", SYLOTI_NAGRI);
9308 aliases.put("SYRC", SYRIAC);
9309 aliases.put("TAGB", TAGBANWA);
9310 aliases.put("TAKR", TAKRI);
9311 aliases.put("TALE", TAI_LE);
9312 aliases.put("TALU", NEW_TAI_LUE);
9313 aliases.put("TAML", TAMIL);
9314 aliases.put("TANG", TANGUT);
9315 aliases.put("TAVT", TAI_VIET);
9316 aliases.put("TAYO", TAI_YO);
9317 aliases.put("TELU", TELUGU);
9318 aliases.put("TFNG", TIFINAGH);
9319 aliases.put("TGLG", TAGALOG);
9320 aliases.put("THAA", THAANA);
9321 aliases.put("THAI", THAI);
9322 aliases.put("TIBT", TIBETAN);
9323 aliases.put("TIRH", TIRHUTA);
9324 aliases.put("TNSA", TANGSA);
9325 aliases.put("TODR", TODHRI);
9326 aliases.put("TOLS", TOLONG_SIKI);
9327 aliases.put("TOTO", TOTO);
9328 aliases.put("TUTG", TULU_TIGALARI);
9329 aliases.put("UGAR", UGARITIC);
9330 aliases.put("VAII", VAI);
9331 aliases.put("VITH", VITHKUQI);
9332 aliases.put("WARA", WARANG_CITI);
9333 aliases.put("WCHO", WANCHO);
9334 aliases.put("XPEO", OLD_PERSIAN);
9335 aliases.put("XSUX", CUNEIFORM);
9336 aliases.put("YEZI", YEZIDI);
9337 aliases.put("YIII", YI);
9338 aliases.put("ZANB", ZANABAZAR_SQUARE);
9339 aliases.put("ZINH", INHERITED);
9340 aliases.put("ZYYY", COMMON);
9341 aliases.put("ZZZZ", UNKNOWN);
9342 }
9343
9344 /**
9345 * Returns the enum constant representing the Unicode script of which
9346 * the given character (Unicode code point) is assigned to.
9347 *
9348 * @param codePoint the character (Unicode code point) in question.
9349 * @return The {@code UnicodeScript} constant representing the
9350 * Unicode script of which this character is assigned to.
9351 *
9352 * @throws IllegalArgumentException if the specified
9353 * {@code codePoint} is an invalid Unicode code point.
9354 * @see Character#isValidCodePoint(int)
9355 *
9356 */
9357 public static UnicodeScript of(int codePoint) {
9358 if (!isValidCodePoint(codePoint))
9359 throw new IllegalArgumentException(
9360 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9361 int type = getType(codePoint);
9362 // leave SURROGATE and PRIVATE_USE for table lookup
9363 if (type == UNASSIGNED)
9364 return UNKNOWN;
9365 int index = Arrays.binarySearch(scriptStarts, codePoint);
9366 if (index < 0)
9367 index = -index - 2;
9368 return scripts[index];
9369 }
9370
9371 /**
9372 * Returns the UnicodeScript constant with the given Unicode script
9373 * name or the script name alias. Script names and their aliases are
9374 * determined by The Unicode Standard. The files {@code Scripts.txt}
9375 * and {@code PropertyValueAliases.txt} define script names
9376 * and the script name aliases for a particular version of the
9377 * standard. The {@link Character} class specifies the version of
9378 * the standard that it supports.
9379 * <p>
9380 * Character case is ignored for all of the valid script names.
9381 * The en_US locale's case mapping rules are used to provide
9382 * case-insensitive string comparisons for script name validation.
9383 *
9384 * @param scriptName A {@code UnicodeScript} name.
9385 * @return The {@code UnicodeScript} constant identified
9386 * by {@code scriptName}
9387 * @throws IllegalArgumentException if {@code scriptName} is an
9388 * invalid name
9389 * @throws NullPointerException if {@code scriptName} is null
9390 */
9391 public static final UnicodeScript forName(String scriptName) {
9392 scriptName = scriptName.toUpperCase(Locale.ENGLISH);
9393 //.replace(' ', '_'));
9394 UnicodeScript sc = aliases.get(scriptName);
9395 if (sc != null)
9396 return sc;
9397 return valueOf(scriptName);
9398 }
9399 }
9400
9401 /**
9402 * The value of the {@code Character}.
9403 *
9404 * @serial
9405 */
9406 private final char value;
9407
9408 /** use serialVersionUID from JDK 1.0.2 for interoperability */
9409 @java.io.Serial
9410 private static final long serialVersionUID = 3786198910865385080L;
9411
9412 /**
9413 * Constructs a newly allocated {@code Character} object that
9414 * represents the specified {@code char} value.
9415 *
9416 * @param value the value to be represented by the
9417 * {@code Character} object.
9418 *
9419 * @deprecated
9420 * It is rarely appropriate to use this constructor. The static factory
9421 * {@link #valueOf(char)} is generally a better choice, as it is
9422 * likely to yield significantly better space and time performance.
9423 */
9424 @Deprecated(since="9")
9425 public Character(char value) {
9426 this.value = value;
9427 }
9428
9429 @AOTSafeClassInitializer
9430 private static final class CharacterCache {
9431 private CharacterCache(){}
9432
9433 @Stable
9434 static final Character[] cache;
9435 static Character[] archivedCache;
9436
9437 static {
9438 int size = 127 + 1;
9439
9440 // Load and use the archived cache if it exists
9441 CDS.initializeFromArchive(CharacterCache.class);
9442 if (archivedCache == null) {
9443 Character[] c = new Character[size];
9444 for (int i = 0; i < size; i++) {
9445 c[i] = new Character((char) i);
9446 }
9447 archivedCache = c;
9448 }
9449 cache = archivedCache;
9450 assert cache.length == size;
9451 }
9452 }
9453
9454 /**
9455 * Returns a {@code Character} instance representing the specified
9456 * {@code char} value.
9457 * <div class="preview-block">
9458 * <div class="preview-comment">
9459 * <p>
9460 * - When preview features are NOT enabled, {@code Character} is an identity class.
9461 * If a new {@code Character} instance is not required, this method
9462 * should generally be used in preference to the constructor
9463 * {@link #Character(char)}, as this method is likely to yield
9464 * significantly better space and time performance by caching
9465 * frequently requested values.
9466 * This method will always cache values in the range {@code
9467 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
9468 * cache other values outside of this range.
9469 * </p>
9470 * <p>
9471 * - When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}.
9472 * The {@code valueOf} behavior is the same as invoking the constructor,
9473 * whether cached or not.
9474 * </p>
9475 * </div>
9476 * </div>
9477 *
9478 * @param c a char value.
9479 * @return a {@code Character} instance representing {@code c}.
9480 * @since 1.5
9481 */
9482 @IntrinsicCandidate
9483 @DeserializeConstructor
9484 public static Character valueOf(char c) {
9485 if (!PreviewFeatures.isEnabled()) {
9486 if (c <= 127) { // must cache
9487 return CharacterCache.cache[(int) c];
9488 }
9489 }
9490 return new Character(c);
9491 }
9492
9493 /**
9494 * Returns the value of this {@code Character} object.
9495 * @return the primitive {@code char} value represented by
9496 * this object.
9497 */
9498 @IntrinsicCandidate
9499 public char charValue() {
9500 return value;
9501 }
9502
9503 /**
9504 * Returns a hash code for this {@code Character}; equal to the result
9505 * of invoking {@code charValue()}.
9506 *
9507 * @return a hash code value for this {@code Character}
9508 */
9509 @Override
9510 public int hashCode() {
9511 return Character.hashCode(value);
9512 }
9513
9514 /**
9515 * Returns a hash code for a {@code char} value; compatible with
9516 * {@code Character.hashCode()}.
9517 *
9518 * @since 1.8
9519 *
9520 * @param value The {@code char} for which to return a hash code.
9521 * @return a hash code value for a {@code char} value.
9522 */
9523 public static int hashCode(char value) {
9524 return (int)value;
9525 }
9526
9527 /**
9528 * Compares this object against the specified object.
9529 * The result is {@code true} if and only if the argument is not
9530 * {@code null} and is a {@code Character} object that
9531 * represents the same {@code char} value as this object.
9532 *
9533 * @param obj the object to compare with.
9534 * @return {@code true} if the objects are the same;
9535 * {@code false} otherwise.
9536 */
9537 public boolean equals(Object obj) {
9538 if (obj instanceof Character c) {
9539 return value == c.charValue();
9540 }
9541 return false;
9542 }
9543
9544 /**
9545 * Returns a {@code String} object representing this
9546 * {@code Character}'s value. The result is a string of
9547 * length 1 whose sole component is the primitive
9548 * {@code char} value represented by this
9549 * {@code Character} object.
9550 *
9551 * @return a string representation of this object.
9552 */
9553 @Override
9554 public String toString() {
9555 return String.valueOf(value);
9556 }
9557
9558 /**
9559 * Returns a {@code String} object representing the
9560 * specified {@code char}. The result is a string of length
9561 * 1 consisting solely of the specified {@code char}.
9562 *
9563 * @apiNote This method cannot handle <a
9564 * href="#supplementary"> supplementary characters</a>. To support
9565 * all Unicode characters, including supplementary characters, use
9566 * the {@link #toString(int)} method.
9567 *
9568 * @param c the {@code char} to be converted
9569 * @return the string representation of the specified {@code char}
9570 * @since 1.4
9571 */
9572 public static String toString(char c) {
9573 return String.valueOf(c);
9574 }
9575
9576 /**
9577 * Returns a {@code String} object representing the
9578 * specified character (Unicode code point). The result is a string of
9579 * length 1 or 2, consisting solely of the specified {@code codePoint}.
9580 *
9581 * @param codePoint the {@code codePoint} to be converted
9582 * @return the string representation of the specified {@code codePoint}
9583 * @throws IllegalArgumentException if the specified
9584 * {@code codePoint} is not a {@linkplain #isValidCodePoint
9585 * valid Unicode code point}.
9586 * @since 11
9587 */
9588 public static String toString(int codePoint) {
9589 return String.valueOfCodePoint(codePoint);
9590 }
9591
9592 /**
9593 * Determines whether the specified code point is a valid
9594 * <a href="http://www.unicode.org/glossary/#code_point">
9595 * Unicode code point value</a>.
9596 *
9597 * @param codePoint the Unicode code point to be tested
9598 * @return {@code true} if the specified code point value is between
9599 * {@link #MIN_CODE_POINT} and
9600 * {@link #MAX_CODE_POINT} inclusive;
9601 * {@code false} otherwise.
9602 * @since 1.5
9603 */
9604 public static boolean isValidCodePoint(int codePoint) {
9605 // Optimized form of:
9606 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
9607 int plane = codePoint >>> 16;
9608 return plane < ((MAX_CODE_POINT + 1) >>> 16);
9609 }
9610
9611 /**
9612 * Determines whether the specified character (Unicode code point)
9613 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
9614 * Such code points can be represented using a single {@code char}.
9615 *
9616 * @param codePoint the character (Unicode code point) to be tested
9617 * @return {@code true} if the specified code point is between
9618 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
9619 * {@code false} otherwise.
9620 * @since 1.7
9621 */
9622 public static boolean isBmpCodePoint(int codePoint) {
9623 return codePoint >>> 16 == 0;
9624 // Optimized form of:
9625 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
9626 // We consistently use logical shift (>>>) to facilitate
9627 // additional runtime optimizations.
9628 }
9629
9630 /**
9631 * Determines whether the specified character (Unicode code point)
9632 * is in the <a href="#supplementary">supplementary character</a> range.
9633 *
9634 * @param codePoint the character (Unicode code point) to be tested
9635 * @return {@code true} if the specified code point is between
9636 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
9637 * {@link #MAX_CODE_POINT} inclusive;
9638 * {@code false} otherwise.
9639 * @since 1.5
9640 */
9641 public static boolean isSupplementaryCodePoint(int codePoint) {
9642 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
9643 && codePoint < MAX_CODE_POINT + 1;
9644 }
9645
9646 /**
9647 * Determines if the given {@code char} value is a
9648 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9649 * Unicode high-surrogate code unit</a>
9650 * (also known as <i>leading-surrogate code unit</i>).
9651 *
9652 * <p>Such values do not represent characters by themselves,
9653 * but are used in the representation of
9654 * <a href="#supplementary">supplementary characters</a>
9655 * in the UTF-16 encoding.
9656 *
9657 * @param ch the {@code char} value to be tested.
9658 * @return {@code true} if the {@code char} value is between
9659 * {@link #MIN_HIGH_SURROGATE} and
9660 * {@link #MAX_HIGH_SURROGATE} inclusive;
9661 * {@code false} otherwise.
9662 * @see Character#isLowSurrogate(char)
9663 * @see Character.UnicodeBlock#of(int)
9664 * @since 1.5
9665 */
9666 public static boolean isHighSurrogate(char ch) {
9667 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
9668 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
9669 }
9670
9671 /**
9672 * Determines if the given {@code char} value is a
9673 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9674 * Unicode low-surrogate code unit</a>
9675 * (also known as <i>trailing-surrogate code unit</i>).
9676 *
9677 * <p>Such values do not represent characters by themselves,
9678 * but are used in the representation of
9679 * <a href="#supplementary">supplementary characters</a>
9680 * in the UTF-16 encoding.
9681 *
9682 * @param ch the {@code char} value to be tested.
9683 * @return {@code true} if the {@code char} value is between
9684 * {@link #MIN_LOW_SURROGATE} and
9685 * {@link #MAX_LOW_SURROGATE} inclusive;
9686 * {@code false} otherwise.
9687 * @see Character#isHighSurrogate(char)
9688 * @since 1.5
9689 */
9690 public static boolean isLowSurrogate(char ch) {
9691 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
9692 }
9693
9694 /**
9695 * Determines if the given {@code char} value is a Unicode
9696 * <i>surrogate code unit</i>.
9697 *
9698 * <p>Such values do not represent characters by themselves,
9699 * but are used in the representation of
9700 * <a href="#supplementary">supplementary characters</a>
9701 * in the UTF-16 encoding.
9702 *
9703 * <p>A char value is a surrogate code unit if and only if it is either
9704 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
9705 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
9706 *
9707 * @param ch the {@code char} value to be tested.
9708 * @return {@code true} if the {@code char} value is between
9709 * {@link #MIN_SURROGATE} and
9710 * {@link #MAX_SURROGATE} inclusive;
9711 * {@code false} otherwise.
9712 * @since 1.7
9713 */
9714 public static boolean isSurrogate(char ch) {
9715 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
9716 }
9717
9718 /**
9719 * Determines whether the specified pair of {@code char}
9720 * values is a valid
9721 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9722 * Unicode surrogate pair</a>.
9723 *
9724 * <p>This method is equivalent to the expression:
9725 * <blockquote><pre>{@code
9726 * isHighSurrogate(high) && isLowSurrogate(low)
9727 * }</pre></blockquote>
9728 *
9729 * @param high the high-surrogate code value to be tested
9730 * @param low the low-surrogate code value to be tested
9731 * @return {@code true} if the specified high and
9732 * low-surrogate code values represent a valid surrogate pair;
9733 * {@code false} otherwise.
9734 * @since 1.5
9735 */
9736 public static boolean isSurrogatePair(char high, char low) {
9737 return isHighSurrogate(high) && isLowSurrogate(low);
9738 }
9739
9740 /**
9741 * Determines the number of {@code char} values needed to
9742 * represent the specified character (Unicode code point). If the
9743 * specified character is equal to or greater than 0x10000, then
9744 * the method returns 2. Otherwise, the method returns 1.
9745 *
9746 * <p>This method doesn't validate the specified character to be a
9747 * valid Unicode code point. The caller must validate the
9748 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
9749 * if necessary.
9750 *
9751 * @param codePoint the character (Unicode code point) to be tested.
9752 * @return 2 if the character is a valid supplementary character; 1 otherwise.
9753 * @see Character#isSupplementaryCodePoint(int)
9754 * @since 1.5
9755 */
9756 public static int charCount(int codePoint) {
9757 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
9758 }
9759
9760 /**
9761 * Converts the specified surrogate pair to its supplementary code
9762 * point value. This method does not validate the specified
9763 * surrogate pair. The caller must validate it using {@link
9764 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
9765 *
9766 * @param high the high-surrogate code unit
9767 * @param low the low-surrogate code unit
9768 * @return the supplementary code point composed from the
9769 * specified surrogate pair.
9770 * @since 1.5
9771 */
9772 public static int toCodePoint(char high, char low) {
9773 // Optimized form of:
9774 // return ((high - MIN_HIGH_SURROGATE) << 10)
9775 // + (low - MIN_LOW_SURROGATE)
9776 // + MIN_SUPPLEMENTARY_CODE_POINT;
9777 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
9778 - (MIN_HIGH_SURROGATE << 10)
9779 - MIN_LOW_SURROGATE);
9780 }
9781
9782 /**
9783 * Returns the code point at the given index of the
9784 * {@code CharSequence}. If the {@code char} value at
9785 * the given index in the {@code CharSequence} is in the
9786 * high-surrogate range, the following index is less than the
9787 * length of the {@code CharSequence}, and the
9788 * {@code char} value at the following index is in the
9789 * low-surrogate range, then the supplementary code point
9790 * corresponding to this surrogate pair is returned. Otherwise,
9791 * the {@code char} value at the given index is returned.
9792 *
9793 * @param seq a sequence of {@code char} values (Unicode code
9794 * units)
9795 * @param index the index to the {@code char} values (Unicode
9796 * code units) in {@code seq} to be converted
9797 * @return the Unicode code point at the given index
9798 * @throws NullPointerException if {@code seq} is null.
9799 * @throws IndexOutOfBoundsException if the value
9800 * {@code index} is negative or not less than
9801 * {@link CharSequence#length() seq.length()}.
9802 * @since 1.5
9803 */
9804 public static int codePointAt(CharSequence seq, int index) {
9805 char c1 = seq.charAt(index);
9806 if (isHighSurrogate(c1) && ++index < seq.length()) {
9807 char c2 = seq.charAt(index);
9808 if (isLowSurrogate(c2)) {
9809 return toCodePoint(c1, c2);
9810 }
9811 }
9812 return c1;
9813 }
9814
9815 /**
9816 * Returns the code point at the given index of the
9817 * {@code char} array. If the {@code char} value at
9818 * the given index in the {@code char} array is in the
9819 * high-surrogate range, the following index is less than the
9820 * length of the {@code char} array, and the
9821 * {@code char} value at the following index is in the
9822 * low-surrogate range, then the supplementary code point
9823 * corresponding to this surrogate pair is returned. Otherwise,
9824 * the {@code char} value at the given index is returned.
9825 *
9826 * @param a the {@code char} array
9827 * @param index the index to the {@code char} values (Unicode
9828 * code units) in the {@code char} array to be converted
9829 * @return the Unicode code point at the given index
9830 * @throws NullPointerException if {@code a} is null.
9831 * @throws IndexOutOfBoundsException if the value
9832 * {@code index} is negative or not less than
9833 * the length of the {@code char} array.
9834 * @since 1.5
9835 */
9836 public static int codePointAt(char[] a, int index) {
9837 return codePointAtImpl(a, index, a.length);
9838 }
9839
9840 /**
9841 * Returns the code point at the given index of the
9842 * {@code char} array, where only array elements with
9843 * {@code index} less than {@code limit} can be used. If
9844 * the {@code char} value at the given index in the
9845 * {@code char} array is in the high-surrogate range, the
9846 * following index is less than the {@code limit}, and the
9847 * {@code char} value at the following index is in the
9848 * low-surrogate range, then the supplementary code point
9849 * corresponding to this surrogate pair is returned. Otherwise,
9850 * the {@code char} value at the given index is returned.
9851 *
9852 * @param a the {@code char} array
9853 * @param index the index to the {@code char} values (Unicode
9854 * code units) in the {@code char} array to be converted
9855 * @param limit the index after the last array element that
9856 * can be used in the {@code char} array
9857 * @return the Unicode code point at the given index
9858 * @throws NullPointerException if {@code a} is null.
9859 * @throws IndexOutOfBoundsException if the {@code index}
9860 * argument is negative or not less than the {@code limit}
9861 * argument, or if the {@code limit} argument is negative or
9862 * greater than the length of the {@code char} array.
9863 * @since 1.5
9864 */
9865 public static int codePointAt(char[] a, int index, int limit) {
9866 if (index >= limit || index < 0 || limit > a.length) {
9867 throw new IndexOutOfBoundsException();
9868 }
9869 return codePointAtImpl(a, index, limit);
9870 }
9871
9872 // throws ArrayIndexOutOfBoundsException if index out of bounds
9873 static int codePointAtImpl(char[] a, int index, int limit) {
9874 char c1 = a[index];
9875 if (isHighSurrogate(c1) && ++index < limit) {
9876 char c2 = a[index];
9877 if (isLowSurrogate(c2)) {
9878 return toCodePoint(c1, c2);
9879 }
9880 }
9881 return c1;
9882 }
9883
9884 /**
9885 * Returns the code point preceding the given index of the
9886 * {@code CharSequence}. If the {@code char} value at
9887 * {@code (index - 1)} in the {@code CharSequence} is in
9888 * the low-surrogate range, {@code (index - 2)} is not
9889 * negative, and the {@code char} value at {@code (index - 2)}
9890 * in the {@code CharSequence} is in the
9891 * high-surrogate range, then the supplementary code point
9892 * corresponding to this surrogate pair is returned. Otherwise,
9893 * the {@code char} value at {@code (index - 1)} is
9894 * returned.
9895 *
9896 * @param seq the {@code CharSequence} instance
9897 * @param index the index following the code point that should be returned
9898 * @return the Unicode code point value before the given index.
9899 * @throws NullPointerException if {@code seq} is null.
9900 * @throws IndexOutOfBoundsException if the {@code index}
9901 * argument is less than 1 or greater than {@link
9902 * CharSequence#length() seq.length()}.
9903 * @since 1.5
9904 */
9905 public static int codePointBefore(CharSequence seq, int index) {
9906 char c2 = seq.charAt(--index);
9907 if (isLowSurrogate(c2) && index > 0) {
9908 char c1 = seq.charAt(--index);
9909 if (isHighSurrogate(c1)) {
9910 return toCodePoint(c1, c2);
9911 }
9912 }
9913 return c2;
9914 }
9915
9916 /**
9917 * Returns the code point preceding the given index of the
9918 * {@code char} array. If the {@code char} value at
9919 * {@code (index - 1)} in the {@code char} array is in
9920 * the low-surrogate range, {@code (index - 2)} is not
9921 * negative, and the {@code char} value at {@code (index - 2)}
9922 * in the {@code char} array is in the
9923 * high-surrogate range, then the supplementary code point
9924 * corresponding to this surrogate pair is returned. Otherwise,
9925 * the {@code char} value at {@code (index - 1)} is
9926 * returned.
9927 *
9928 * @param a the {@code char} array
9929 * @param index the index following the code point that should be returned
9930 * @return the Unicode code point value before the given index.
9931 * @throws NullPointerException if {@code a} is null.
9932 * @throws IndexOutOfBoundsException if the {@code index}
9933 * argument is less than 1 or greater than the length of the
9934 * {@code char} array
9935 * @since 1.5
9936 */
9937 public static int codePointBefore(char[] a, int index) {
9938 return codePointBeforeImpl(a, index, 0);
9939 }
9940
9941 /**
9942 * Returns the code point preceding the given index of the
9943 * {@code char} array, where only array elements with
9944 * {@code index} greater than or equal to {@code start}
9945 * can be used. If the {@code char} value at {@code (index - 1)}
9946 * in the {@code char} array is in the
9947 * low-surrogate range, {@code (index - 2)} is not less than
9948 * {@code start}, and the {@code char} value at
9949 * {@code (index - 2)} in the {@code char} array is in
9950 * the high-surrogate range, then the supplementary code point
9951 * corresponding to this surrogate pair is returned. Otherwise,
9952 * the {@code char} value at {@code (index - 1)} is
9953 * returned.
9954 *
9955 * @param a the {@code char} array
9956 * @param index the index following the code point that should be returned
9957 * @param start the index of the first array element in the
9958 * {@code char} array
9959 * @return the Unicode code point value before the given index.
9960 * @throws NullPointerException if {@code a} is null.
9961 * @throws IndexOutOfBoundsException if the {@code index}
9962 * argument is not greater than the {@code start} argument or
9963 * is greater than the length of the {@code char} array, or
9964 * if the {@code start} argument is negative or not less than
9965 * the length of the {@code char} array.
9966 * @since 1.5
9967 */
9968 public static int codePointBefore(char[] a, int index, int start) {
9969 if (index <= start || start < 0 || index > a.length) {
9970 throw new IndexOutOfBoundsException();
9971 }
9972 return codePointBeforeImpl(a, index, start);
9973 }
9974
9975 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
9976 static int codePointBeforeImpl(char[] a, int index, int start) {
9977 char c2 = a[--index];
9978 if (isLowSurrogate(c2) && index > start) {
9979 char c1 = a[--index];
9980 if (isHighSurrogate(c1)) {
9981 return toCodePoint(c1, c2);
9982 }
9983 }
9984 return c2;
9985 }
9986
9987 /**
9988 * Returns the leading surrogate (a
9989 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9990 * high surrogate code unit</a>) of the
9991 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9992 * surrogate pair</a>
9993 * representing the specified supplementary character (Unicode
9994 * code point) in the UTF-16 encoding. If the specified character
9995 * is not a
9996 * <a href="Character.html#supplementary">supplementary character</a>,
9997 * an unspecified {@code char} is returned.
9998 *
9999 * <p>If
10000 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
10001 * is {@code true}, then
10002 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
10003 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
10004 * are also always {@code true}.
10005 *
10006 * @param codePoint a supplementary character (Unicode code point)
10007 * @return the leading surrogate code unit used to represent the
10008 * character in the UTF-16 encoding
10009 * @since 1.7
10010 */
10011 public static char highSurrogate(int codePoint) {
10012 return (char) ((codePoint >>> 10)
10013 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
10014 }
10015
10016 /**
10017 * Returns the trailing surrogate (a
10018 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
10019 * low surrogate code unit</a>) of the
10020 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
10021 * surrogate pair</a>
10022 * representing the specified supplementary character (Unicode
10023 * code point) in the UTF-16 encoding. If the specified character
10024 * is not a
10025 * <a href="Character.html#supplementary">supplementary character</a>,
10026 * an unspecified {@code char} is returned.
10027 *
10028 * <p>If
10029 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
10030 * is {@code true}, then
10031 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
10032 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
10033 * are also always {@code true}.
10034 *
10035 * @param codePoint a supplementary character (Unicode code point)
10036 * @return the trailing surrogate code unit used to represent the
10037 * character in the UTF-16 encoding
10038 * @since 1.7
10039 */
10040 public static char lowSurrogate(int codePoint) {
10041 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
10042 }
10043
10044 /**
10045 * Converts the specified character (Unicode code point) to its
10046 * UTF-16 representation. If the specified code point is a BMP
10047 * (Basic Multilingual Plane or Plane 0) value, the same value is
10048 * stored in {@code dst[dstIndex]}, and 1 is returned. If the
10049 * specified code point is a supplementary character, its
10050 * surrogate values are stored in {@code dst[dstIndex]}
10051 * (high-surrogate) and {@code dst[dstIndex+1]}
10052 * (low-surrogate), and 2 is returned.
10053 *
10054 * @param codePoint the character (Unicode code point) to be converted.
10055 * @param dst an array of {@code char} in which the
10056 * {@code codePoint}'s UTF-16 value is stored.
10057 * @param dstIndex the start index into the {@code dst}
10058 * array where the converted value is stored.
10059 * @return 1 if the code point is a BMP code point, 2 if the
10060 * code point is a supplementary code point.
10061 * @throws IllegalArgumentException if the specified
10062 * {@code codePoint} is not a valid Unicode code point.
10063 * @throws NullPointerException if the specified {@code dst} is null.
10064 * @throws IndexOutOfBoundsException if {@code dstIndex}
10065 * is negative or not less than {@code dst.length}, or if
10066 * {@code dst} at {@code dstIndex} doesn't have enough
10067 * array element(s) to store the resulting {@code char}
10068 * value(s). (If {@code dstIndex} is equal to
10069 * {@code dst.length-1} and the specified
10070 * {@code codePoint} is a supplementary character, the
10071 * high-surrogate value is not stored in
10072 * {@code dst[dstIndex]}.)
10073 * @since 1.5
10074 */
10075 public static int toChars(int codePoint, char[] dst, int dstIndex) {
10076 if (isBmpCodePoint(codePoint)) {
10077 dst[dstIndex] = (char) codePoint;
10078 return 1;
10079 } else if (isValidCodePoint(codePoint)) {
10080 toSurrogates(codePoint, dst, dstIndex);
10081 return 2;
10082 } else {
10083 throw new IllegalArgumentException(
10084 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10085 }
10086 }
10087
10088 /**
10089 * Converts the specified character (Unicode code point) to its
10090 * UTF-16 representation stored in a {@code char} array. If
10091 * the specified code point is a BMP (Basic Multilingual Plane or
10092 * Plane 0) value, the resulting {@code char} array has
10093 * the same value as {@code codePoint}. If the specified code
10094 * point is a supplementary code point, the resulting
10095 * {@code char} array has the corresponding surrogate pair.
10096 *
10097 * @param codePoint a Unicode code point
10098 * @return a {@code char} array having
10099 * {@code codePoint}'s UTF-16 representation.
10100 * @throws IllegalArgumentException if the specified
10101 * {@code codePoint} is not a valid Unicode code point.
10102 * @since 1.5
10103 */
10104 public static char[] toChars(int codePoint) {
10105 if (isBmpCodePoint(codePoint)) {
10106 return new char[] { (char) codePoint };
10107 } else if (isValidCodePoint(codePoint)) {
10108 char[] result = new char[2];
10109 toSurrogates(codePoint, result, 0);
10110 return result;
10111 } else {
10112 throw new IllegalArgumentException(
10113 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10114 }
10115 }
10116
10117 static void toSurrogates(int codePoint, char[] dst, int index) {
10118 // We write elements "backwards" to guarantee all-or-nothing
10119 dst[index+1] = lowSurrogate(codePoint);
10120 dst[index] = highSurrogate(codePoint);
10121 }
10122
10123 /**
10124 * Returns the number of Unicode code points in the text range of
10125 * the specified char sequence. The text range begins at the
10126 * specified {@code beginIndex} and extends to the
10127 * {@code char} at index {@code endIndex - 1}. Thus the
10128 * length (in {@code char}s) of the text range is
10129 * {@code endIndex-beginIndex}. Unpaired surrogates within
10130 * the text range count as one code point each.
10131 *
10132 * @param seq the char sequence
10133 * @param beginIndex the index to the first {@code char} of
10134 * the text range.
10135 * @param endIndex the index after the last {@code char} of
10136 * the text range.
10137 * @return the number of Unicode code points in the specified text
10138 * range
10139 * @throws NullPointerException if {@code seq} is null.
10140 * @throws IndexOutOfBoundsException if the
10141 * {@code beginIndex} is negative, or {@code endIndex}
10142 * is larger than the length of the given sequence, or
10143 * {@code beginIndex} is larger than {@code endIndex}.
10144 * @since 1.5
10145 */
10146 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
10147 Objects.checkFromToIndex(beginIndex, endIndex, seq.length());
10148 int n = endIndex - beginIndex;
10149 for (int i = beginIndex; i < endIndex; ) {
10150 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
10151 isLowSurrogate(seq.charAt(i))) {
10152 n--;
10153 i++;
10154 }
10155 }
10156 return n;
10157 }
10158
10159 /**
10160 * Returns the number of Unicode code points in a subarray of the
10161 * {@code char} array argument. The {@code offset}
10162 * argument is the index of the first {@code char} of the
10163 * subarray and the {@code count} argument specifies the
10164 * length of the subarray in {@code char}s. Unpaired
10165 * surrogates within the subarray count as one code point each.
10166 *
10167 * @param a the {@code char} array
10168 * @param offset the index of the first {@code char} in the
10169 * given {@code char} array
10170 * @param count the length of the subarray in {@code char}s
10171 * @return the number of Unicode code points in the specified subarray
10172 * @throws NullPointerException if {@code a} is null.
10173 * @throws IndexOutOfBoundsException if {@code offset} or
10174 * {@code count} is negative, or if {@code offset +
10175 * count} is larger than the length of the given array.
10176 * @since 1.5
10177 */
10178 public static int codePointCount(char[] a, int offset, int count) {
10179 Objects.checkFromIndexSize(offset, count, a.length);
10180 return codePointCountImpl(a, offset, count);
10181 }
10182
10183 static int codePointCountImpl(char[] a, int offset, int count) {
10184 int endIndex = offset + count;
10185 int n = count;
10186 for (int i = offset; i < endIndex; ) {
10187 if (isHighSurrogate(a[i++]) && i < endIndex &&
10188 isLowSurrogate(a[i])) {
10189 n--;
10190 i++;
10191 }
10192 }
10193 return n;
10194 }
10195
10196 /**
10197 * Returns the index within the given char sequence that is offset
10198 * from the given {@code index} by {@code codePointOffset}
10199 * code points. Unpaired surrogates within the text range given by
10200 * {@code index} and {@code codePointOffset} count as
10201 * one code point each.
10202 *
10203 * @param seq the char sequence
10204 * @param index the index to be offset
10205 * @param codePointOffset the offset in code points
10206 * @return the index within the char sequence
10207 * @throws NullPointerException if {@code seq} is null.
10208 * @throws IndexOutOfBoundsException if {@code index}
10209 * is negative or larger than the length of the char sequence,
10210 * or if {@code codePointOffset} is positive and the
10211 * subsequence starting with {@code index} has fewer than
10212 * {@code codePointOffset} code points, or if
10213 * {@code codePointOffset} is negative and the subsequence
10214 * before {@code index} has fewer than the absolute value
10215 * of {@code codePointOffset} code points.
10216 * @since 1.5
10217 */
10218 public static int offsetByCodePoints(CharSequence seq, int index,
10219 int codePointOffset) {
10220 int length = seq.length();
10221 if (index < 0 || index > length) {
10222 throw new IndexOutOfBoundsException();
10223 }
10224
10225 int x = index;
10226 if (codePointOffset >= 0) {
10227 int i;
10228 for (i = 0; x < length && i < codePointOffset; i++) {
10229 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
10230 isLowSurrogate(seq.charAt(x))) {
10231 x++;
10232 }
10233 }
10234 if (i < codePointOffset) {
10235 throw new IndexOutOfBoundsException();
10236 }
10237 } else {
10238 int i;
10239 for (i = codePointOffset; x > 0 && i < 0; i++) {
10240 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
10241 isHighSurrogate(seq.charAt(x-1))) {
10242 x--;
10243 }
10244 }
10245 if (i < 0) {
10246 throw new IndexOutOfBoundsException();
10247 }
10248 }
10249 return x;
10250 }
10251
10252 /**
10253 * Returns the index within the given {@code char} subarray
10254 * that is offset from the given {@code index} by
10255 * {@code codePointOffset} code points. The
10256 * {@code start} and {@code count} arguments specify a
10257 * subarray of the {@code char} array. Unpaired surrogates
10258 * within the text range given by {@code index} and
10259 * {@code codePointOffset} count as one code point each.
10260 *
10261 * @param a the {@code char} array
10262 * @param start the index of the first {@code char} of the
10263 * subarray
10264 * @param count the length of the subarray in {@code char}s
10265 * @param index the index to be offset
10266 * @param codePointOffset the offset in code points
10267 * @return the index within the subarray
10268 * @throws NullPointerException if {@code a} is null.
10269 * @throws IndexOutOfBoundsException
10270 * if {@code start} or {@code count} is negative,
10271 * or if {@code start + count} is larger than the length of
10272 * the given array,
10273 * or if {@code index} is less than {@code start} or
10274 * larger then {@code start + count},
10275 * or if {@code codePointOffset} is positive and the text range
10276 * starting with {@code index} and ending with {@code start + count - 1}
10277 * has fewer than {@code codePointOffset} code
10278 * points,
10279 * or if {@code codePointOffset} is negative and the text range
10280 * starting with {@code start} and ending with {@code index - 1}
10281 * has fewer than the absolute value of
10282 * {@code codePointOffset} code points.
10283 * @since 1.5
10284 */
10285 public static int offsetByCodePoints(char[] a, int start, int count,
10286 int index, int codePointOffset) {
10287 if (count > a.length-start || start < 0 || count < 0
10288 || index < start || index > start+count) {
10289 throw new IndexOutOfBoundsException();
10290 }
10291 return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
10292 }
10293
10294 static int offsetByCodePointsImpl(char[]a, int start, int count,
10295 int index, int codePointOffset) {
10296 int x = index;
10297 if (codePointOffset >= 0) {
10298 int limit = start + count;
10299 int i;
10300 for (i = 0; x < limit && i < codePointOffset; i++) {
10301 if (isHighSurrogate(a[x++]) && x < limit &&
10302 isLowSurrogate(a[x])) {
10303 x++;
10304 }
10305 }
10306 if (i < codePointOffset) {
10307 throw new IndexOutOfBoundsException();
10308 }
10309 } else {
10310 int i;
10311 for (i = codePointOffset; x > start && i < 0; i++) {
10312 if (isLowSurrogate(a[--x]) && x > start &&
10313 isHighSurrogate(a[x-1])) {
10314 x--;
10315 }
10316 }
10317 if (i < 0) {
10318 throw new IndexOutOfBoundsException();
10319 }
10320 }
10321 return x;
10322 }
10323
10324 /**
10325 * Determines if the specified character is a lowercase character.
10326 * <p>
10327 * A character is lowercase if its general category type, provided
10328 * by {@code Character.getType(ch)}, is
10329 * {@code LOWERCASE_LETTER}, or it has contributory property
10330 * Other_Lowercase as defined by the Unicode Standard.
10331 * <p>
10332 * The following are examples of lowercase characters:
10333 * <blockquote><pre>
10334 * a b c d e f g h i j k l m n o p q r s t u v w x y z
10335 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
10336 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
10337 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
10338 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
10339 * </pre></blockquote>
10340 * <p> Many other Unicode characters are lowercase too.
10341 *
10342 * <p><b>Note:</b> This method cannot handle <a
10343 * href="#supplementary"> supplementary characters</a>. To support
10344 * all Unicode characters, including supplementary characters, use
10345 * the {@link #isLowerCase(int)} method.
10346 *
10347 * @param ch the character to be tested.
10348 * @return {@code true} if the character is lowercase;
10349 * {@code false} otherwise.
10350 * @see Character#isLowerCase(char)
10351 * @see Character#isTitleCase(char)
10352 * @see Character#toLowerCase(char)
10353 * @see Character#getType(char)
10354 */
10355 public static boolean isLowerCase(char ch) {
10356 return isLowerCase((int)ch);
10357 }
10358
10359 /**
10360 * Determines if the specified character (Unicode code point) is a
10361 * lowercase character.
10362 * <p>
10363 * A character is lowercase if its general category type, provided
10364 * by {@link Character#getType getType(codePoint)}, is
10365 * {@code LOWERCASE_LETTER}, or it has contributory property
10366 * Other_Lowercase as defined by the Unicode Standard.
10367 * <p>
10368 * The following are examples of lowercase characters:
10369 * <blockquote><pre>
10370 * a b c d e f g h i j k l m n o p q r s t u v w x y z
10371 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
10372 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
10373 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
10374 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
10375 * </pre></blockquote>
10376 * <p> Many other Unicode characters are lowercase too.
10377 *
10378 * @param codePoint the character (Unicode code point) to be tested.
10379 * @return {@code true} if the character is lowercase;
10380 * {@code false} otherwise.
10381 * @see Character#isLowerCase(int)
10382 * @see Character#isTitleCase(int)
10383 * @see Character#toLowerCase(int)
10384 * @see Character#getType(int)
10385 * @since 1.5
10386 */
10387 public static boolean isLowerCase(int codePoint) {
10388 return CharacterData.of(codePoint).isLowerCase(codePoint);
10389 }
10390
10391 /**
10392 * Determines if the specified character is an uppercase character.
10393 * <p>
10394 * A character is uppercase if its general category type, provided by
10395 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
10396 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
10397 * <p>
10398 * The following are examples of uppercase characters:
10399 * <blockquote><pre>
10400 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
10401 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
10402 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
10403 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
10404 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
10405 * </pre></blockquote>
10406 * <p> Many other Unicode characters are uppercase too.
10407 *
10408 * <p><b>Note:</b> This method cannot handle <a
10409 * href="#supplementary"> supplementary characters</a>. To support
10410 * all Unicode characters, including supplementary characters, use
10411 * the {@link #isUpperCase(int)} method.
10412 *
10413 * @param ch the character to be tested.
10414 * @return {@code true} if the character is uppercase;
10415 * {@code false} otherwise.
10416 * @see Character#isLowerCase(char)
10417 * @see Character#isTitleCase(char)
10418 * @see Character#toUpperCase(char)
10419 * @see Character#getType(char)
10420 * @since 1.0
10421 */
10422 public static boolean isUpperCase(char ch) {
10423 return isUpperCase((int)ch);
10424 }
10425
10426 /**
10427 * Determines if the specified character (Unicode code point) is an uppercase character.
10428 * <p>
10429 * A character is uppercase if its general category type, provided by
10430 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
10431 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
10432 * <p>
10433 * The following are examples of uppercase characters:
10434 * <blockquote><pre>
10435 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
10436 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
10437 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
10438 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
10439 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
10440 * </pre></blockquote>
10441 * <p> Many other Unicode characters are uppercase too.
10442 *
10443 * @param codePoint the character (Unicode code point) to be tested.
10444 * @return {@code true} if the character is uppercase;
10445 * {@code false} otherwise.
10446 * @see Character#isLowerCase(int)
10447 * @see Character#isTitleCase(int)
10448 * @see Character#toUpperCase(int)
10449 * @see Character#getType(int)
10450 * @since 1.5
10451 */
10452 public static boolean isUpperCase(int codePoint) {
10453 return CharacterData.of(codePoint).isUpperCase(codePoint);
10454 }
10455
10456 /**
10457 * Determines if the specified character is a titlecase character.
10458 * <p>
10459 * A character is a titlecase character if its general
10460 * category type, provided by {@code Character.getType(ch)},
10461 * is {@code TITLECASE_LETTER}.
10462 * <p>
10463 * Some characters look like pairs of Latin letters. For example, there
10464 * is an uppercase letter that looks like "LJ" and has a corresponding
10465 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10466 * is the appropriate form to use when rendering a word in lowercase
10467 * with initial capitals, as for a book title.
10468 * <p>
10469 * These are some of the Unicode characters for which this method returns
10470 * {@code true}:
10471 * <ul>
10472 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10473 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10474 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10475 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10476 * </ul>
10477 * <p> Many other Unicode characters are titlecase too.
10478 *
10479 * <p><b>Note:</b> This method cannot handle <a
10480 * href="#supplementary"> supplementary characters</a>. To support
10481 * all Unicode characters, including supplementary characters, use
10482 * the {@link #isTitleCase(int)} method.
10483 *
10484 * @param ch the character to be tested.
10485 * @return {@code true} if the character is titlecase;
10486 * {@code false} otherwise.
10487 * @see Character#isLowerCase(char)
10488 * @see Character#isUpperCase(char)
10489 * @see Character#toTitleCase(char)
10490 * @see Character#getType(char)
10491 * @since 1.0.2
10492 */
10493 public static boolean isTitleCase(char ch) {
10494 return isTitleCase((int)ch);
10495 }
10496
10497 /**
10498 * Determines if the specified character (Unicode code point) is a titlecase character.
10499 * <p>
10500 * A character is a titlecase character if its general
10501 * category type, provided by {@link Character#getType(int) getType(codePoint)},
10502 * is {@code TITLECASE_LETTER}.
10503 * <p>
10504 * Some characters look like pairs of Latin letters. For example, there
10505 * is an uppercase letter that looks like "LJ" and has a corresponding
10506 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10507 * is the appropriate form to use when rendering a word in lowercase
10508 * with initial capitals, as for a book title.
10509 * <p>
10510 * These are some of the Unicode characters for which this method returns
10511 * {@code true}:
10512 * <ul>
10513 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10514 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10515 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10516 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10517 * </ul>
10518 * <p> Many other Unicode characters are titlecase too.
10519 *
10520 * @param codePoint the character (Unicode code point) to be tested.
10521 * @return {@code true} if the character is titlecase;
10522 * {@code false} otherwise.
10523 * @see Character#isLowerCase(int)
10524 * @see Character#isUpperCase(int)
10525 * @see Character#toTitleCase(int)
10526 * @see Character#getType(int)
10527 * @since 1.5
10528 */
10529 public static boolean isTitleCase(int codePoint) {
10530 return getType(codePoint) == Character.TITLECASE_LETTER;
10531 }
10532
10533 /**
10534 * Determines if the specified character is a digit.
10535 * <p>
10536 * A character is a digit if its general category type, provided
10537 * by {@code Character.getType(ch)}, is
10538 * {@code DECIMAL_DIGIT_NUMBER}.
10539 * <p>
10540 * Some Unicode character ranges that contain digits:
10541 * <ul>
10542 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10543 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10544 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10545 * Arabic-Indic digits
10546 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10547 * Extended Arabic-Indic digits
10548 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10549 * Devanagari digits
10550 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10551 * Fullwidth digits
10552 * </ul>
10553 *
10554 * Many other character ranges contain digits as well.
10555 *
10556 * <p><b>Note:</b> This method cannot handle <a
10557 * href="#supplementary"> supplementary characters</a>. To support
10558 * all Unicode characters, including supplementary characters, use
10559 * the {@link #isDigit(int)} method.
10560 *
10561 * @param ch the character to be tested.
10562 * @return {@code true} if the character is a digit;
10563 * {@code false} otherwise.
10564 * @see Character#digit(char, int)
10565 * @see Character#forDigit(int, int)
10566 * @see Character#getType(char)
10567 */
10568 public static boolean isDigit(char ch) {
10569 return isDigit((int)ch);
10570 }
10571
10572 /**
10573 * Determines if the specified character (Unicode code point) is a digit.
10574 * <p>
10575 * A character is a digit if its general category type, provided
10576 * by {@link Character#getType(int) getType(codePoint)}, is
10577 * {@code DECIMAL_DIGIT_NUMBER}.
10578 * <p>
10579 * Some Unicode character ranges that contain digits:
10580 * <ul>
10581 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10582 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10583 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10584 * Arabic-Indic digits
10585 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10586 * Extended Arabic-Indic digits
10587 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10588 * Devanagari digits
10589 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10590 * Fullwidth digits
10591 * </ul>
10592 *
10593 * Many other character ranges contain digits as well.
10594 *
10595 * @param codePoint the character (Unicode code point) to be tested.
10596 * @return {@code true} if the character is a digit;
10597 * {@code false} otherwise.
10598 * @see Character#forDigit(int, int)
10599 * @see Character#getType(int)
10600 * @since 1.5
10601 */
10602 public static boolean isDigit(int codePoint) {
10603 return CharacterData.of(codePoint).isDigit(codePoint);
10604 }
10605
10606 /**
10607 * Determines if a character is defined in Unicode.
10608 * <p>
10609 * A character is defined if at least one of the following is true:
10610 * <ul>
10611 * <li>It has an entry in the UnicodeData file.
10612 * <li>It has a value in a range defined by the UnicodeData file.
10613 * </ul>
10614 *
10615 * <p><b>Note:</b> This method cannot handle <a
10616 * href="#supplementary"> supplementary characters</a>. To support
10617 * all Unicode characters, including supplementary characters, use
10618 * the {@link #isDefined(int)} method.
10619 *
10620 * @param ch the character to be tested
10621 * @return {@code true} if the character has a defined meaning
10622 * in Unicode; {@code false} otherwise.
10623 * @see Character#isDigit(char)
10624 * @see Character#isLetter(char)
10625 * @see Character#isLetterOrDigit(char)
10626 * @see Character#isLowerCase(char)
10627 * @see Character#isTitleCase(char)
10628 * @see Character#isUpperCase(char)
10629 * @since 1.0.2
10630 */
10631 public static boolean isDefined(char ch) {
10632 return isDefined((int)ch);
10633 }
10634
10635 /**
10636 * Determines if a character (Unicode code point) is defined in Unicode.
10637 * <p>
10638 * A character is defined if at least one of the following is true:
10639 * <ul>
10640 * <li>It has an entry in the UnicodeData file.
10641 * <li>It has a value in a range defined by the UnicodeData file.
10642 * </ul>
10643 *
10644 * @param codePoint the character (Unicode code point) to be tested.
10645 * @return {@code true} if the character has a defined meaning
10646 * in Unicode; {@code false} otherwise.
10647 * @see Character#isDigit(int)
10648 * @see Character#isLetter(int)
10649 * @see Character#isLetterOrDigit(int)
10650 * @see Character#isLowerCase(int)
10651 * @see Character#isTitleCase(int)
10652 * @see Character#isUpperCase(int)
10653 * @since 1.5
10654 */
10655 public static boolean isDefined(int codePoint) {
10656 return getType(codePoint) != Character.UNASSIGNED;
10657 }
10658
10659 /**
10660 * Determines if the specified character is a letter.
10661 * <p>
10662 * A character is considered to be a letter if its general
10663 * category type, provided by {@code Character.getType(ch)},
10664 * is any of the following:
10665 * <ul>
10666 * <li> {@code UPPERCASE_LETTER}
10667 * <li> {@code LOWERCASE_LETTER}
10668 * <li> {@code TITLECASE_LETTER}
10669 * <li> {@code MODIFIER_LETTER}
10670 * <li> {@code OTHER_LETTER}
10671 * </ul>
10672 *
10673 * Not all letters have case. Many characters are
10674 * letters but are neither uppercase nor lowercase nor titlecase.
10675 *
10676 * <p><b>Note:</b> This method cannot handle <a
10677 * href="#supplementary"> supplementary characters</a>. To support
10678 * all Unicode characters, including supplementary characters, use
10679 * the {@link #isLetter(int)} method.
10680 *
10681 * @param ch the character to be tested.
10682 * @return {@code true} if the character is a letter;
10683 * {@code false} otherwise.
10684 * @see Character#isDigit(char)
10685 * @see Character#isJavaIdentifierStart(char)
10686 * @see Character#isJavaLetter(char)
10687 * @see Character#isJavaLetterOrDigit(char)
10688 * @see Character#isLetterOrDigit(char)
10689 * @see Character#isLowerCase(char)
10690 * @see Character#isTitleCase(char)
10691 * @see Character#isUnicodeIdentifierStart(char)
10692 * @see Character#isUpperCase(char)
10693 */
10694 public static boolean isLetter(char ch) {
10695 return isLetter((int)ch);
10696 }
10697
10698 /**
10699 * Determines if the specified character (Unicode code point) is a letter.
10700 * <p>
10701 * A character is considered to be a letter if its general
10702 * category type, provided by {@link Character#getType(int) getType(codePoint)},
10703 * is any of the following:
10704 * <ul>
10705 * <li> {@code UPPERCASE_LETTER}
10706 * <li> {@code LOWERCASE_LETTER}
10707 * <li> {@code TITLECASE_LETTER}
10708 * <li> {@code MODIFIER_LETTER}
10709 * <li> {@code OTHER_LETTER}
10710 * </ul>
10711 *
10712 * Not all letters have case. Many characters are
10713 * letters but are neither uppercase nor lowercase nor titlecase.
10714 *
10715 * @param codePoint the character (Unicode code point) to be tested.
10716 * @return {@code true} if the character is a letter;
10717 * {@code false} otherwise.
10718 * @see Character#isDigit(int)
10719 * @see Character#isJavaIdentifierStart(int)
10720 * @see Character#isLetterOrDigit(int)
10721 * @see Character#isLowerCase(int)
10722 * @see Character#isTitleCase(int)
10723 * @see Character#isUnicodeIdentifierStart(int)
10724 * @see Character#isUpperCase(int)
10725 * @since 1.5
10726 */
10727 public static boolean isLetter(int codePoint) {
10728 return ((((1 << Character.UPPERCASE_LETTER) |
10729 (1 << Character.LOWERCASE_LETTER) |
10730 (1 << Character.TITLECASE_LETTER) |
10731 (1 << Character.MODIFIER_LETTER) |
10732 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
10733 != 0;
10734 }
10735
10736 /**
10737 * Determines if the specified character is a letter or digit.
10738 * <p>
10739 * A character is considered to be a letter or digit if either
10740 * {@code Character.isLetter(char ch)} or
10741 * {@code Character.isDigit(char ch)} returns
10742 * {@code true} for the character.
10743 *
10744 * <p><b>Note:</b> This method cannot handle <a
10745 * href="#supplementary"> supplementary characters</a>. To support
10746 * all Unicode characters, including supplementary characters, use
10747 * the {@link #isLetterOrDigit(int)} method.
10748 *
10749 * @param ch the character to be tested.
10750 * @return {@code true} if the character is a letter or digit;
10751 * {@code false} otherwise.
10752 * @see Character#isDigit(char)
10753 * @see Character#isJavaIdentifierPart(char)
10754 * @see Character#isJavaLetter(char)
10755 * @see Character#isJavaLetterOrDigit(char)
10756 * @see Character#isLetter(char)
10757 * @see Character#isUnicodeIdentifierPart(char)
10758 * @since 1.0.2
10759 */
10760 public static boolean isLetterOrDigit(char ch) {
10761 return isLetterOrDigit((int)ch);
10762 }
10763
10764 /**
10765 * Determines if the specified character (Unicode code point) is a letter or digit.
10766 * <p>
10767 * A character is considered to be a letter or digit if either
10768 * {@link #isLetter(int) isLetter(codePoint)} or
10769 * {@link #isDigit(int) isDigit(codePoint)} returns
10770 * {@code true} for the character.
10771 *
10772 * @param codePoint the character (Unicode code point) to be tested.
10773 * @return {@code true} if the character is a letter or digit;
10774 * {@code false} otherwise.
10775 * @see Character#isDigit(int)
10776 * @see Character#isJavaIdentifierPart(int)
10777 * @see Character#isLetter(int)
10778 * @see Character#isUnicodeIdentifierPart(int)
10779 * @since 1.5
10780 */
10781 public static boolean isLetterOrDigit(int codePoint) {
10782 return ((((1 << Character.UPPERCASE_LETTER) |
10783 (1 << Character.LOWERCASE_LETTER) |
10784 (1 << Character.TITLECASE_LETTER) |
10785 (1 << Character.MODIFIER_LETTER) |
10786 (1 << Character.OTHER_LETTER) |
10787 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
10788 != 0;
10789 }
10790
10791 /**
10792 * Determines if the specified character is permissible as the first
10793 * character in a Java identifier.
10794 * <p>
10795 * A character may start a Java identifier if and only if
10796 * one of the following conditions is true:
10797 * <ul>
10798 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10799 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10800 * <li> {@code ch} is a currency symbol (such as {@code '$'})
10801 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10802 * </ul>
10803 *
10804 * @param ch the character to be tested.
10805 * @return {@code true} if the character may start a Java
10806 * identifier; {@code false} otherwise.
10807 * @see Character#isJavaLetterOrDigit(char)
10808 * @see Character#isJavaIdentifierStart(char)
10809 * @see Character#isJavaIdentifierPart(char)
10810 * @see Character#isLetter(char)
10811 * @see Character#isLetterOrDigit(char)
10812 * @see Character#isUnicodeIdentifierStart(char)
10813 * @since 1.0.2
10814 * @deprecated Replaced by isJavaIdentifierStart(char).
10815 */
10816 @Deprecated(since="1.1")
10817 public static boolean isJavaLetter(char ch) {
10818 return isJavaIdentifierStart(ch);
10819 }
10820
10821 /**
10822 * Determines if the specified character may be part of a Java
10823 * identifier as other than the first character.
10824 * <p>
10825 * A character may be part of a Java identifier if and only if one
10826 * of the following conditions is true:
10827 * <ul>
10828 * <li> it is a letter
10829 * <li> it is a currency symbol (such as {@code '$'})
10830 * <li> it is a connecting punctuation character (such as {@code '_'})
10831 * <li> it is a digit
10832 * <li> it is a numeric letter (such as a Roman numeral character)
10833 * <li> it is a combining mark
10834 * <li> it is a non-spacing mark
10835 * <li> {@code isIdentifierIgnorable} returns
10836 * {@code true} for the character.
10837 * </ul>
10838 *
10839 * @param ch the character to be tested.
10840 * @return {@code true} if the character may be part of a
10841 * Java identifier; {@code false} otherwise.
10842 * @see Character#isJavaLetter(char)
10843 * @see Character#isJavaIdentifierStart(char)
10844 * @see Character#isJavaIdentifierPart(char)
10845 * @see Character#isLetter(char)
10846 * @see Character#isLetterOrDigit(char)
10847 * @see Character#isUnicodeIdentifierPart(char)
10848 * @see Character#isIdentifierIgnorable(char)
10849 * @since 1.0.2
10850 * @deprecated Replaced by isJavaIdentifierPart(char).
10851 */
10852 @Deprecated(since="1.1")
10853 public static boolean isJavaLetterOrDigit(char ch) {
10854 return isJavaIdentifierPart(ch);
10855 }
10856
10857 /**
10858 * Determines if the specified character (Unicode code point) is alphabetic.
10859 * <p>
10860 * A character is considered to be alphabetic if its general category type,
10861 * provided by {@link Character#getType(int) getType(codePoint)}, is any of
10862 * the following:
10863 * <ul>
10864 * <li> {@code UPPERCASE_LETTER}
10865 * <li> {@code LOWERCASE_LETTER}
10866 * <li> {@code TITLECASE_LETTER}
10867 * <li> {@code MODIFIER_LETTER}
10868 * <li> {@code OTHER_LETTER}
10869 * <li> {@code LETTER_NUMBER}
10870 * </ul>
10871 * or it has contributory property Other_Alphabetic as defined by the
10872 * Unicode Standard.
10873 *
10874 * @param codePoint the character (Unicode code point) to be tested.
10875 * @return {@code true} if the character is a Unicode alphabet
10876 * character, {@code false} otherwise.
10877 * @since 1.7
10878 */
10879 public static boolean isAlphabetic(int codePoint) {
10880 return (((((1 << Character.UPPERCASE_LETTER) |
10881 (1 << Character.LOWERCASE_LETTER) |
10882 (1 << Character.TITLECASE_LETTER) |
10883 (1 << Character.MODIFIER_LETTER) |
10884 (1 << Character.OTHER_LETTER) |
10885 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
10886 CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
10887 }
10888
10889 /**
10890 * Determines if the specified character (Unicode code point) is a CJKV
10891 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
10892 * the Unicode Standard.
10893 *
10894 * @param codePoint the character (Unicode code point) to be tested.
10895 * @return {@code true} if the character is a Unicode ideograph
10896 * character, {@code false} otherwise.
10897 * @since 1.7
10898 */
10899 public static boolean isIdeographic(int codePoint) {
10900 return CharacterData.of(codePoint).isIdeographic(codePoint);
10901 }
10902
10903 /**
10904 * Determines if the specified character is
10905 * permissible as the first character in a Java identifier.
10906 * <p>
10907 * A character may start a Java identifier if and only if
10908 * one of the following conditions is true:
10909 * <ul>
10910 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10911 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10912 * <li> {@code ch} is a currency symbol (such as {@code '$'})
10913 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10914 * </ul>
10915 *
10916 * <p><b>Note:</b> This method cannot handle <a
10917 * href="#supplementary"> supplementary characters</a>. To support
10918 * all Unicode characters, including supplementary characters, use
10919 * the {@link #isJavaIdentifierStart(int)} method.
10920 *
10921 * @param ch the character to be tested.
10922 * @return {@code true} if the character may start a Java identifier;
10923 * {@code false} otherwise.
10924 * @see Character#isJavaIdentifierPart(char)
10925 * @see Character#isLetter(char)
10926 * @see Character#isUnicodeIdentifierStart(char)
10927 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10928 * @since 1.1
10929 */
10930 @SuppressWarnings("doclint:reference") // cross-module links
10931 public static boolean isJavaIdentifierStart(char ch) {
10932 return isJavaIdentifierStart((int)ch);
10933 }
10934
10935 /**
10936 * Determines if the character (Unicode code point) is
10937 * permissible as the first character in a Java identifier.
10938 * <p>
10939 * A character may start a Java identifier if and only if
10940 * one of the following conditions is true:
10941 * <ul>
10942 * <li> {@link #isLetter(int) isLetter(codePoint)}
10943 * returns {@code true}
10944 * <li> {@link #getType(int) getType(codePoint)}
10945 * returns {@code LETTER_NUMBER}
10946 * <li> the referenced character is a currency symbol (such as {@code '$'})
10947 * <li> the referenced character is a connecting punctuation character
10948 * (such as {@code '_'}).
10949 * </ul>
10950 *
10951 * @param codePoint the character (Unicode code point) to be tested.
10952 * @return {@code true} if the character may start a Java identifier;
10953 * {@code false} otherwise.
10954 * @see Character#isJavaIdentifierPart(int)
10955 * @see Character#isLetter(int)
10956 * @see Character#isUnicodeIdentifierStart(int)
10957 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10958 * @since 1.5
10959 */
10960 @SuppressWarnings("doclint:reference") // cross-module links
10961 public static boolean isJavaIdentifierStart(int codePoint) {
10962 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
10963 }
10964
10965 /**
10966 * Determines if the specified character may be part of a Java
10967 * identifier as other than the first character.
10968 * <p>
10969 * A character may be part of a Java identifier if any of the following
10970 * conditions are true:
10971 * <ul>
10972 * <li> it is a letter
10973 * <li> it is a currency symbol (such as {@code '$'})
10974 * <li> it is a connecting punctuation character (such as {@code '_'})
10975 * <li> it is a digit
10976 * <li> it is a numeric letter (such as a Roman numeral character)
10977 * <li> it is a combining mark
10978 * <li> it is a non-spacing mark
10979 * <li> {@code isIdentifierIgnorable} returns
10980 * {@code true} for the character
10981 * </ul>
10982 *
10983 * <p><b>Note:</b> This method cannot handle <a
10984 * href="#supplementary"> supplementary characters</a>. To support
10985 * all Unicode characters, including supplementary characters, use
10986 * the {@link #isJavaIdentifierPart(int)} method.
10987 *
10988 * @param ch the character to be tested.
10989 * @return {@code true} if the character may be part of a
10990 * Java identifier; {@code false} otherwise.
10991 * @see Character#isIdentifierIgnorable(char)
10992 * @see Character#isJavaIdentifierStart(char)
10993 * @see Character#isLetterOrDigit(char)
10994 * @see Character#isUnicodeIdentifierPart(char)
10995 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10996 * @since 1.1
10997 */
10998 @SuppressWarnings("doclint:reference") // cross-module links
10999 public static boolean isJavaIdentifierPart(char ch) {
11000 return isJavaIdentifierPart((int)ch);
11001 }
11002
11003 /**
11004 * Determines if the character (Unicode code point) may be part of a Java
11005 * identifier as other than the first character.
11006 * <p>
11007 * A character may be part of a Java identifier if any of the following
11008 * conditions are true:
11009 * <ul>
11010 * <li> it is a letter
11011 * <li> it is a currency symbol (such as {@code '$'})
11012 * <li> it is a connecting punctuation character (such as {@code '_'})
11013 * <li> it is a digit
11014 * <li> it is a numeric letter (such as a Roman numeral character)
11015 * <li> it is a combining mark
11016 * <li> it is a non-spacing mark
11017 * <li> {@link #isIdentifierIgnorable(int)
11018 * isIdentifierIgnorable(codePoint)} returns {@code true} for
11019 * the code point
11020 * </ul>
11021 *
11022 * @param codePoint the character (Unicode code point) to be tested.
11023 * @return {@code true} if the character may be part of a
11024 * Java identifier; {@code false} otherwise.
11025 * @see Character#isIdentifierIgnorable(int)
11026 * @see Character#isJavaIdentifierStart(int)
11027 * @see Character#isLetterOrDigit(int)
11028 * @see Character#isUnicodeIdentifierPart(int)
11029 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
11030 * @since 1.5
11031 */
11032 @SuppressWarnings("doclint:reference") // cross-module links
11033 public static boolean isJavaIdentifierPart(int codePoint) {
11034 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
11035 }
11036
11037 /**
11038 * Determines if the specified character is permissible as the
11039 * first character in a Unicode identifier.
11040 * <p>
11041 * A character may start a Unicode identifier if and only if
11042 * one of the following conditions is true:
11043 * <ul>
11044 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
11045 * <li> {@link #getType(char) getType(ch)} returns
11046 * {@code LETTER_NUMBER}.
11047 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11048 * {@code Other_ID_Start}</a> character.
11049 * </ul>
11050 * <p>
11051 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11052 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11053 * with the following profile of UAX31:
11054 * <pre>
11055 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
11056 * </pre>
11057 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
11058 * compatibility.
11059 *
11060 * <p><b>Note:</b> This method cannot handle <a
11061 * href="#supplementary"> supplementary characters</a>. To support
11062 * all Unicode characters, including supplementary characters, use
11063 * the {@link #isUnicodeIdentifierStart(int)} method.
11064 *
11065 * @param ch the character to be tested.
11066 * @return {@code true} if the character may start a Unicode
11067 * identifier; {@code false} otherwise.
11068 *
11069 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11070 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11071 * @see Character#isJavaIdentifierStart(char)
11072 * @see Character#isLetter(char)
11073 * @see Character#isUnicodeIdentifierPart(char)
11074 * @since 1.1
11075 */
11076 public static boolean isUnicodeIdentifierStart(char ch) {
11077 return isUnicodeIdentifierStart((int)ch);
11078 }
11079
11080 /**
11081 * Determines if the specified character (Unicode code point) is permissible as the
11082 * first character in a Unicode identifier.
11083 * <p>
11084 * A character may start a Unicode identifier if and only if
11085 * one of the following conditions is true:
11086 * <ul>
11087 * <li> {@link #isLetter(int) isLetter(codePoint)}
11088 * returns {@code true}
11089 * <li> {@link #getType(int) getType(codePoint)}
11090 * returns {@code LETTER_NUMBER}.
11091 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11092 * {@code Other_ID_Start}</a> character.
11093 * </ul>
11094 * <p>
11095 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11096 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11097 * with the following profile of UAX31:
11098 * <pre>
11099 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
11100 * </pre>
11101 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
11102 * compatibility.
11103 *
11104 * @param codePoint the character (Unicode code point) to be tested.
11105 * @return {@code true} if the character may start a Unicode
11106 * identifier; {@code false} otherwise.
11107 *
11108 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11109 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11110 * @see Character#isJavaIdentifierStart(int)
11111 * @see Character#isLetter(int)
11112 * @see Character#isUnicodeIdentifierPart(int)
11113 * @since 1.5
11114 */
11115 public static boolean isUnicodeIdentifierStart(int codePoint) {
11116 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
11117 }
11118
11119 /**
11120 * Determines if the specified character may be part of a Unicode
11121 * identifier as other than the first character.
11122 * <p>
11123 * A character may be part of a Unicode identifier if and only if
11124 * one of the following statements is true:
11125 * <ul>
11126 * <li> it is a letter
11127 * <li> it is a connecting punctuation character (such as {@code '_'})
11128 * <li> it is a digit
11129 * <li> it is a numeric letter (such as a Roman numeral character)
11130 * <li> it is a combining mark
11131 * <li> it is a non-spacing mark
11132 * <li> {@code isIdentifierIgnorable} returns
11133 * {@code true} for this character.
11134 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11135 * {@code Other_ID_Start}</a> character.
11136 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
11137 * {@code Other_ID_Continue}</a> character.
11138 * </ul>
11139 * <p>
11140 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11141 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11142 * with the following profile of UAX31:
11143 * <pre>
11144 * Continue := Start + ID_Continue + ignorable
11145 * Medial := empty
11146 * ignorable := isIdentifierIgnorable(char) returns true for the character
11147 * </pre>
11148 * {@code ignorable} is added to {@code Continue} for backward
11149 * compatibility.
11150 *
11151 * <p><b>Note:</b> This method cannot handle <a
11152 * href="#supplementary"> supplementary characters</a>. To support
11153 * all Unicode characters, including supplementary characters, use
11154 * the {@link #isUnicodeIdentifierPart(int)} method.
11155 *
11156 * @param ch the character to be tested.
11157 * @return {@code true} if the character may be part of a
11158 * Unicode identifier; {@code false} otherwise.
11159 *
11160 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11161 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11162 * @see Character#isIdentifierIgnorable(char)
11163 * @see Character#isJavaIdentifierPart(char)
11164 * @see Character#isLetterOrDigit(char)
11165 * @see Character#isUnicodeIdentifierStart(char)
11166 * @since 1.1
11167 */
11168 public static boolean isUnicodeIdentifierPart(char ch) {
11169 return isUnicodeIdentifierPart((int)ch);
11170 }
11171
11172 /**
11173 * Determines if the specified character (Unicode code point) may be part of a Unicode
11174 * identifier as other than the first character.
11175 * <p>
11176 * A character may be part of a Unicode identifier if and only if
11177 * one of the following statements is true:
11178 * <ul>
11179 * <li> it is a letter
11180 * <li> it is a connecting punctuation character (such as {@code '_'})
11181 * <li> it is a digit
11182 * <li> it is a numeric letter (such as a Roman numeral character)
11183 * <li> it is a combining mark
11184 * <li> it is a non-spacing mark
11185 * <li> {@code isIdentifierIgnorable} returns
11186 * {@code true} for this character.
11187 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11188 * {@code Other_ID_Start}</a> character.
11189 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
11190 * {@code Other_ID_Continue}</a> character.
11191 * </ul>
11192 * <p>
11193 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11194 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11195 * with the following profile of UAX31:
11196 * <pre>
11197 * Continue := Start + ID_Continue + ignorable
11198 * Medial := empty
11199 * ignorable := isIdentifierIgnorable(int) returns true for the character
11200 * </pre>
11201 * {@code ignorable} is added to {@code Continue} for backward
11202 * compatibility.
11203 *
11204 * @param codePoint the character (Unicode code point) to be tested.
11205 * @return {@code true} if the character may be part of a
11206 * Unicode identifier; {@code false} otherwise.
11207 *
11208 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11209 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11210 * @see Character#isIdentifierIgnorable(int)
11211 * @see Character#isJavaIdentifierPart(int)
11212 * @see Character#isLetterOrDigit(int)
11213 * @see Character#isUnicodeIdentifierStart(int)
11214 * @since 1.5
11215 */
11216 public static boolean isUnicodeIdentifierPart(int codePoint) {
11217 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
11218 }
11219
11220 /**
11221 * Determines if the specified character should be regarded as
11222 * an ignorable character in a Java identifier or a Unicode identifier.
11223 * <p>
11224 * The following Unicode characters are ignorable in a Java identifier
11225 * or a Unicode identifier:
11226 * <ul>
11227 * <li>ISO control characters that are not whitespace
11228 * <ul>
11229 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
11230 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
11231 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
11232 * </ul>
11233 *
11234 * <li>all characters that have the {@code FORMAT} general
11235 * category value
11236 * </ul>
11237 *
11238 * <p><b>Note:</b> This method cannot handle <a
11239 * href="#supplementary"> supplementary characters</a>. To support
11240 * all Unicode characters, including supplementary characters, use
11241 * the {@link #isIdentifierIgnorable(int)} method.
11242 *
11243 * @param ch the character to be tested.
11244 * @return {@code true} if the character is an ignorable control
11245 * character that may be part of a Java or Unicode identifier;
11246 * {@code false} otherwise.
11247 * @see Character#isJavaIdentifierPart(char)
11248 * @see Character#isUnicodeIdentifierPart(char)
11249 * @since 1.1
11250 */
11251 public static boolean isIdentifierIgnorable(char ch) {
11252 return isIdentifierIgnorable((int)ch);
11253 }
11254
11255 /**
11256 * Determines if the specified character (Unicode code point) should be regarded as
11257 * an ignorable character in a Java identifier or a Unicode identifier.
11258 * <p>
11259 * The following Unicode characters are ignorable in a Java identifier
11260 * or a Unicode identifier:
11261 * <ul>
11262 * <li>ISO control characters that are not whitespace
11263 * <ul>
11264 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
11265 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
11266 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
11267 * </ul>
11268 *
11269 * <li>all characters that have the {@code FORMAT} general
11270 * category value
11271 * </ul>
11272 *
11273 * @param codePoint the character (Unicode code point) to be tested.
11274 * @return {@code true} if the character is an ignorable control
11275 * character that may be part of a Java or Unicode identifier;
11276 * {@code false} otherwise.
11277 * @see Character#isJavaIdentifierPart(int)
11278 * @see Character#isUnicodeIdentifierPart(int)
11279 * @since 1.5
11280 */
11281 public static boolean isIdentifierIgnorable(int codePoint) {
11282 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
11283 }
11284
11285 /**
11286 * Determines if the specified character (Unicode code point) is an Emoji.
11287 * <p>
11288 * A character is considered to be an Emoji if and only if it has the {@code Emoji}
11289 * property, defined in
11290 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11291 * Unicode Emoji (Technical Standard #51)</a>.
11292 *
11293 * @param codePoint the character (Unicode code point) to be tested.
11294 * @return {@code true} if the character is an Emoji;
11295 * {@code false} otherwise.
11296 * @spec https://www.unicode.org/reports/tr51/ Unicode Emoji
11297 * @since 21
11298 */
11299 public static boolean isEmoji(int codePoint) {
11300 return CharacterData.of(codePoint).isEmoji(codePoint);
11301 }
11302
11303 /**
11304 * Determines if the specified character (Unicode code point) has the
11305 * Emoji Presentation property by default.
11306 * <p>
11307 * A character is considered to have the Emoji Presentation property if and
11308 * only if it has the {@code Emoji_Presentation} property, defined in
11309 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11310 * Unicode Emoji (Technical Standard #51)</a>.
11311 *
11312 * @param codePoint the character (Unicode code point) to be tested.
11313 * @return {@code true} if the character has the Emoji Presentation
11314 * property; {@code false} otherwise.
11315 * @spec https://www.unicode.org/reports/tr51/ Unicode Emoji
11316 * @since 21
11317 */
11318 public static boolean isEmojiPresentation(int codePoint) {
11319 return CharacterData.of(codePoint).isEmojiPresentation(codePoint);
11320 }
11321
11322 /**
11323 * Determines if the specified character (Unicode code point) is an
11324 * Emoji Modifier.
11325 * <p>
11326 * A character is considered to be an Emoji Modifier if and only if it has
11327 * the {@code Emoji_Modifier} property, defined in
11328 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11329 * Unicode Emoji (Technical Standard #51)</a>.
11330 *
11331 * @param codePoint the character (Unicode code point) to be tested.
11332 * @return {@code true} if the character is an Emoji Modifier;
11333 * {@code false} otherwise.
11334 * @spec https://www.unicode.org/reports/tr51/ Unicode Emoji
11335 * @since 21
11336 */
11337 public static boolean isEmojiModifier(int codePoint) {
11338 return CharacterData.of(codePoint).isEmojiModifier(codePoint);
11339 }
11340
11341 /**
11342 * Determines if the specified character (Unicode code point) is an
11343 * Emoji Modifier Base.
11344 * <p>
11345 * A character is considered to be an Emoji Modifier Base if and only if it has
11346 * the {@code Emoji_Modifier_Base} property, defined in
11347 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11348 * Unicode Emoji (Technical Standard #51)</a>.
11349 *
11350 * @param codePoint the character (Unicode code point) to be tested.
11351 * @return {@code true} if the character is an Emoji Modifier Base;
11352 * {@code false} otherwise.
11353 * @spec https://www.unicode.org/reports/tr51/ Unicode Emoji
11354 * @since 21
11355 */
11356 public static boolean isEmojiModifierBase(int codePoint) {
11357 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint);
11358 }
11359
11360 /**
11361 * Determines if the specified character (Unicode code point) is an
11362 * Emoji Component.
11363 * <p>
11364 * A character is considered to be an Emoji Component if and only if it has
11365 * the {@code Emoji_Component} property, defined in
11366 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11367 * Unicode Emoji (Technical Standard #51)</a>.
11368 *
11369 * @param codePoint the character (Unicode code point) to be tested.
11370 * @return {@code true} if the character is an Emoji Component;
11371 * {@code false} otherwise.
11372 * @spec https://www.unicode.org/reports/tr51/ Unicode Emoji
11373 * @since 21
11374 */
11375 public static boolean isEmojiComponent(int codePoint) {
11376 return CharacterData.of(codePoint).isEmojiComponent(codePoint);
11377 }
11378
11379 /**
11380 * Determines if the specified character (Unicode code point) is
11381 * an Extended Pictographic.
11382 * <p>
11383 * A character is considered to be an Extended Pictographic if and only if it has
11384 * the {@code Extended_Pictographic} property, defined in
11385 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11386 * Unicode Emoji (Technical Standard #51)</a>.
11387 *
11388 * @param codePoint the character (Unicode code point) to be tested.
11389 * @return {@code true} if the character is an Extended Pictographic;
11390 * {@code false} otherwise.
11391 * @spec https://www.unicode.org/reports/tr51/ Unicode Emoji
11392 * @since 21
11393 */
11394 public static boolean isExtendedPictographic(int codePoint) {
11395 return CharacterData.of(codePoint).isExtendedPictographic(codePoint);
11396 }
11397
11398 /**
11399 * Converts the character argument to lowercase using case
11400 * mapping information from the UnicodeData file.
11401 * <p>
11402 * Note that
11403 * {@code Character.isLowerCase(Character.toLowerCase(ch))}
11404 * does not always return {@code true} for some ranges of
11405 * characters, particularly those that are symbols or ideographs.
11406 *
11407 * <p>In general, {@link String#toLowerCase()} should be used to map
11408 * characters to lowercase. {@code String} case mapping methods
11409 * have several benefits over {@code Character} case mapping methods.
11410 * {@code String} case mapping methods can perform locale-sensitive
11411 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11412 * the {@code Character} case mapping methods cannot.
11413 *
11414 * <p><b>Note:</b> This method cannot handle <a
11415 * href="#supplementary"> supplementary characters</a>. To support
11416 * all Unicode characters, including supplementary characters, use
11417 * the {@link #toLowerCase(int)} method.
11418 *
11419 * @param ch the character to be converted.
11420 * @return the lowercase equivalent of the character, if any;
11421 * otherwise, the character itself.
11422 * @see Character#isLowerCase(char)
11423 * @see String#toLowerCase()
11424 */
11425 public static char toLowerCase(char ch) {
11426 return (char)toLowerCase((int)ch);
11427 }
11428
11429 /**
11430 * Converts the character (Unicode code point) argument to
11431 * lowercase using case mapping information from the UnicodeData
11432 * file.
11433 *
11434 * <p> Note that
11435 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
11436 * does not always return {@code true} for some ranges of
11437 * characters, particularly those that are symbols or ideographs.
11438 *
11439 * <p>In general, {@link String#toLowerCase()} should be used to map
11440 * characters to lowercase. {@code String} case mapping methods
11441 * have several benefits over {@code Character} case mapping methods.
11442 * {@code String} case mapping methods can perform locale-sensitive
11443 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11444 * the {@code Character} case mapping methods cannot.
11445 *
11446 * @param codePoint the character (Unicode code point) to be converted.
11447 * @return the lowercase equivalent of the character (Unicode code
11448 * point), if any; otherwise, the character itself.
11449 * @see Character#isLowerCase(int)
11450 * @see String#toLowerCase()
11451 *
11452 * @since 1.5
11453 */
11454 public static int toLowerCase(int codePoint) {
11455 return CharacterData.of(codePoint).toLowerCase(codePoint);
11456 }
11457
11458 /**
11459 * Converts the character argument to uppercase using case mapping
11460 * information from the UnicodeData file.
11461 * <p>
11462 * Note that
11463 * {@code Character.isUpperCase(Character.toUpperCase(ch))}
11464 * does not always return {@code true} for some ranges of
11465 * characters, particularly those that are symbols or ideographs.
11466 *
11467 * <p>In general, {@link String#toUpperCase()} should be used to map
11468 * characters to uppercase. {@code String} case mapping methods
11469 * have several benefits over {@code Character} case mapping methods.
11470 * {@code String} case mapping methods can perform locale-sensitive
11471 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11472 * the {@code Character} case mapping methods cannot.
11473 *
11474 * <p><b>Note:</b> This method cannot handle <a
11475 * href="#supplementary"> supplementary characters</a>. To support
11476 * all Unicode characters, including supplementary characters, use
11477 * the {@link #toUpperCase(int)} method.
11478 *
11479 * @param ch the character to be converted.
11480 * @return the uppercase equivalent of the character, if any;
11481 * otherwise, the character itself.
11482 * @see Character#isUpperCase(char)
11483 * @see String#toUpperCase()
11484 */
11485 public static char toUpperCase(char ch) {
11486 return (char)toUpperCase((int)ch);
11487 }
11488
11489 /**
11490 * Converts the character (Unicode code point) argument to
11491 * uppercase using case mapping information from the UnicodeData
11492 * file.
11493 *
11494 * <p>Note that
11495 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
11496 * does not always return {@code true} for some ranges of
11497 * characters, particularly those that are symbols or ideographs.
11498 *
11499 * <p>In general, {@link String#toUpperCase()} should be used to map
11500 * characters to uppercase. {@code String} case mapping methods
11501 * have several benefits over {@code Character} case mapping methods.
11502 * {@code String} case mapping methods can perform locale-sensitive
11503 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11504 * the {@code Character} case mapping methods cannot.
11505 *
11506 * @param codePoint the character (Unicode code point) to be converted.
11507 * @return the uppercase equivalent of the character, if any;
11508 * otherwise, the character itself.
11509 * @see Character#isUpperCase(int)
11510 * @see String#toUpperCase()
11511 *
11512 * @since 1.5
11513 */
11514 public static int toUpperCase(int codePoint) {
11515 return CharacterData.of(codePoint).toUpperCase(codePoint);
11516 }
11517
11518 /**
11519 * Converts the character argument to titlecase using case mapping
11520 * information from the UnicodeData file. If a character has no
11521 * explicit titlecase mapping and is not itself a titlecase char
11522 * according to UnicodeData, then the uppercase mapping is
11523 * returned as an equivalent titlecase mapping. If the
11524 * {@code char} argument is already a titlecase
11525 * {@code char}, the same {@code char} value will be
11526 * returned.
11527 * <p>
11528 * Note that
11529 * {@code Character.isTitleCase(Character.toTitleCase(ch))}
11530 * does not always return {@code true} for some ranges of
11531 * characters.
11532 *
11533 * <p><b>Note:</b> This method cannot handle <a
11534 * href="#supplementary"> supplementary characters</a>. To support
11535 * all Unicode characters, including supplementary characters, use
11536 * the {@link #toTitleCase(int)} method.
11537 *
11538 * @param ch the character to be converted.
11539 * @return the titlecase equivalent of the character, if any;
11540 * otherwise, the character itself.
11541 * @see Character#isTitleCase(char)
11542 * @see Character#toLowerCase(char)
11543 * @see Character#toUpperCase(char)
11544 * @since 1.0.2
11545 */
11546 public static char toTitleCase(char ch) {
11547 return (char)toTitleCase((int)ch);
11548 }
11549
11550 /**
11551 * Converts the character (Unicode code point) argument to titlecase using case mapping
11552 * information from the UnicodeData file. If a character has no
11553 * explicit titlecase mapping and is not itself a titlecase char
11554 * according to UnicodeData, then the uppercase mapping is
11555 * returned as an equivalent titlecase mapping. If the
11556 * character argument is already a titlecase
11557 * character, the same character value will be
11558 * returned.
11559 *
11560 * <p>Note that
11561 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
11562 * does not always return {@code true} for some ranges of
11563 * characters.
11564 *
11565 * @param codePoint the character (Unicode code point) to be converted.
11566 * @return the titlecase equivalent of the character, if any;
11567 * otherwise, the character itself.
11568 * @see Character#isTitleCase(int)
11569 * @see Character#toLowerCase(int)
11570 * @see Character#toUpperCase(int)
11571 * @since 1.5
11572 */
11573 public static int toTitleCase(int codePoint) {
11574 return CharacterData.of(codePoint).toTitleCase(codePoint);
11575 }
11576
11577 /**
11578 * Returns the numeric value of the character {@code ch} in the
11579 * specified radix.
11580 * <p>
11581 * If the radix is not in the range {@code MIN_RADIX} ≤
11582 * {@code radix} ≤ {@code MAX_RADIX} or if the
11583 * value of {@code ch} is not a valid digit in the specified
11584 * radix, {@code -1} is returned. A character is a valid digit
11585 * if at least one of the following is true:
11586 * <ul>
11587 * <li>The method {@code isDigit} is {@code true} of the character
11588 * and the Unicode decimal digit value of the character (or its
11589 * single-character decomposition) is less than the specified radix.
11590 * In this case the decimal digit value is returned.
11591 * <li>The character is one of the uppercase Latin letters
11592 * {@code 'A'} through {@code 'Z'} and its code is less than
11593 * {@code radix + 'A' - 10}.
11594 * In this case, {@code ch - 'A' + 10}
11595 * is returned.
11596 * <li>The character is one of the lowercase Latin letters
11597 * {@code 'a'} through {@code 'z'} and its code is less than
11598 * {@code radix + 'a' - 10}.
11599 * In this case, {@code ch - 'a' + 10}
11600 * is returned.
11601 * <li>The character is one of the fullwidth uppercase Latin letters A
11602 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11603 * and its code is less than
11604 * {@code radix + '\u005CuFF21' - 10}.
11605 * In this case, {@code ch - '\u005CuFF21' + 10}
11606 * is returned.
11607 * <li>The character is one of the fullwidth lowercase Latin letters a
11608 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11609 * and its code is less than
11610 * {@code radix + '\u005CuFF41' - 10}.
11611 * In this case, {@code ch - '\u005CuFF41' + 10}
11612 * is returned.
11613 * </ul>
11614 *
11615 * <p><b>Note:</b> This method cannot handle <a
11616 * href="#supplementary"> supplementary characters</a>. To support
11617 * all Unicode characters, including supplementary characters, use
11618 * the {@link #digit(int, int)} method.
11619 *
11620 * @param ch the character to be converted.
11621 * @param radix the radix.
11622 * @return the numeric value represented by the character in the
11623 * specified radix.
11624 * @see Character#forDigit(int, int)
11625 * @see Character#isDigit(char)
11626 */
11627 public static int digit(char ch, int radix) {
11628 return digit((int)ch, radix);
11629 }
11630
11631 /**
11632 * Returns the numeric value of the specified character (Unicode
11633 * code point) in the specified radix.
11634 *
11635 * <p>If the radix is not in the range {@code MIN_RADIX} ≤
11636 * {@code radix} ≤ {@code MAX_RADIX} or if the
11637 * character is not a valid digit in the specified
11638 * radix, {@code -1} is returned. A character is a valid digit
11639 * if at least one of the following is true:
11640 * <ul>
11641 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
11642 * and the Unicode decimal digit value of the character (or its
11643 * single-character decomposition) is less than the specified radix.
11644 * In this case the decimal digit value is returned.
11645 * <li>The character is one of the uppercase Latin letters
11646 * {@code 'A'} through {@code 'Z'} and its code is less than
11647 * {@code radix + 'A' - 10}.
11648 * In this case, {@code codePoint - 'A' + 10}
11649 * is returned.
11650 * <li>The character is one of the lowercase Latin letters
11651 * {@code 'a'} through {@code 'z'} and its code is less than
11652 * {@code radix + 'a' - 10}.
11653 * In this case, {@code codePoint - 'a' + 10}
11654 * is returned.
11655 * <li>The character is one of the fullwidth uppercase Latin letters A
11656 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11657 * and its code is less than
11658 * {@code radix + '\u005CuFF21' - 10}.
11659 * In this case,
11660 * {@code codePoint - '\u005CuFF21' + 10}
11661 * is returned.
11662 * <li>The character is one of the fullwidth lowercase Latin letters a
11663 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11664 * and its code is less than
11665 * {@code radix + '\u005CuFF41'- 10}.
11666 * In this case,
11667 * {@code codePoint - '\u005CuFF41' + 10}
11668 * is returned.
11669 * </ul>
11670 *
11671 * @param codePoint the character (Unicode code point) to be converted.
11672 * @param radix the radix.
11673 * @return the numeric value represented by the character in the
11674 * specified radix.
11675 * @see Character#forDigit(int, int)
11676 * @see Character#isDigit(int)
11677 * @since 1.5
11678 */
11679 public static int digit(int codePoint, int radix) {
11680 return CharacterData.of(codePoint).digit(codePoint, radix);
11681 }
11682
11683 /**
11684 * Returns the {@code int} value that the specified Unicode
11685 * character represents. For example, the character
11686 * {@code '\u005Cu216C'} (the roman numeral fifty) will return
11687 * an int with a value of 50.
11688 * <p>
11689 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11690 * {@code '\u005Cu005A'}), lowercase
11691 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11692 * full width variant ({@code '\u005CuFF21'} through
11693 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11694 * {@code '\u005CuFF5A'}) forms have numeric values from 10
11695 * through 35. This is independent of the Unicode specification,
11696 * which does not assign numeric values to these {@code char}
11697 * values.
11698 * <p>
11699 * If the character does not have a numeric value, then -1 is returned.
11700 * If the character has a numeric value that cannot be represented as a
11701 * nonnegative integer (for example, a fractional value), then -2
11702 * is returned.
11703 *
11704 * <p><b>Note:</b> This method cannot handle <a
11705 * href="#supplementary"> supplementary characters</a>. To support
11706 * all Unicode characters, including supplementary characters, use
11707 * the {@link #getNumericValue(int)} method.
11708 *
11709 * @param ch the character to be converted.
11710 * @return the numeric value of the character, as a nonnegative {@code int}
11711 * value; -2 if the character has a numeric value but the value
11712 * can not be represented as a nonnegative {@code int} value;
11713 * -1 if the character has no numeric value.
11714 * @see Character#forDigit(int, int)
11715 * @see Character#isDigit(char)
11716 * @since 1.1
11717 */
11718 public static int getNumericValue(char ch) {
11719 return getNumericValue((int)ch);
11720 }
11721
11722 /**
11723 * Returns the {@code int} value that the specified
11724 * character (Unicode code point) represents. For example, the character
11725 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
11726 * an {@code int} with a value of 50.
11727 * <p>
11728 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11729 * {@code '\u005Cu005A'}), lowercase
11730 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11731 * full width variant ({@code '\u005CuFF21'} through
11732 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11733 * {@code '\u005CuFF5A'}) forms have numeric values from 10
11734 * through 35. This is independent of the Unicode specification,
11735 * which does not assign numeric values to these {@code char}
11736 * values.
11737 * <p>
11738 * If the character does not have a numeric value, then -1 is returned.
11739 * If the character has a numeric value that cannot be represented as a
11740 * nonnegative integer (for example, a fractional value), then -2
11741 * is returned.
11742 *
11743 * @param codePoint the character (Unicode code point) to be converted.
11744 * @return the numeric value of the character, as a nonnegative {@code int}
11745 * value; -2 if the character has a numeric value but the value
11746 * can not be represented as a nonnegative {@code int} value;
11747 * -1 if the character has no numeric value.
11748 * @see Character#forDigit(int, int)
11749 * @see Character#isDigit(int)
11750 * @since 1.5
11751 */
11752 public static int getNumericValue(int codePoint) {
11753 return CharacterData.of(codePoint).getNumericValue(codePoint);
11754 }
11755
11756 /**
11757 * Determines if the specified character is ISO-LATIN-1 white space.
11758 * This method returns {@code true} for the following five
11759 * characters only:
11760 * <table class="striped">
11761 * <caption style="display:none">truechars</caption>
11762 * <thead>
11763 * <tr><th scope="col">Character
11764 * <th scope="col">Code
11765 * <th scope="col">Name
11766 * </thead>
11767 * <tbody>
11768 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td>
11769 * <td>{@code HORIZONTAL TABULATION}</td></tr>
11770 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td>
11771 * <td>{@code NEW LINE}</td></tr>
11772 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td>
11773 * <td>{@code FORM FEED}</td></tr>
11774 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td>
11775 * <td>{@code CARRIAGE RETURN}</td></tr>
11776 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td>
11777 * <td>{@code SPACE}</td></tr>
11778 * </tbody>
11779 * </table>
11780 *
11781 * @param ch the character to be tested.
11782 * @return {@code true} if the character is ISO-LATIN-1 white
11783 * space; {@code false} otherwise.
11784 * @see Character#isSpaceChar(char)
11785 * @see Character#isWhitespace(char)
11786 * @deprecated Replaced by isWhitespace(char).
11787 */
11788 @Deprecated(since="1.1")
11789 public static boolean isSpace(char ch) {
11790 return (ch <= 0x0020) &&
11791 (((((1L << 0x0009) |
11792 (1L << 0x000A) |
11793 (1L << 0x000C) |
11794 (1L << 0x000D) |
11795 (1L << 0x0020)) >> ch) & 1L) != 0);
11796 }
11797
11798
11799 /**
11800 * Determines if the specified character is a Unicode space character.
11801 * A character is considered to be a space character if and only if
11802 * it is specified to be a space character by the Unicode Standard. This
11803 * method returns true if the character's general category type is any of
11804 * the following:
11805 * <ul>
11806 * <li> {@code SPACE_SEPARATOR}
11807 * <li> {@code LINE_SEPARATOR}
11808 * <li> {@code PARAGRAPH_SEPARATOR}
11809 * </ul>
11810 *
11811 * <p><b>Note:</b> This method cannot handle <a
11812 * href="#supplementary"> supplementary characters</a>. To support
11813 * all Unicode characters, including supplementary characters, use
11814 * the {@link #isSpaceChar(int)} method.
11815 *
11816 * @param ch the character to be tested.
11817 * @return {@code true} if the character is a space character;
11818 * {@code false} otherwise.
11819 * @see Character#isWhitespace(char)
11820 * @since 1.1
11821 */
11822 public static boolean isSpaceChar(char ch) {
11823 return isSpaceChar((int)ch);
11824 }
11825
11826 /**
11827 * Determines if the specified character (Unicode code point) is a
11828 * Unicode space character. A character is considered to be a
11829 * space character if and only if it is specified to be a space
11830 * character by the Unicode Standard. This method returns true if
11831 * the character's general category type is any of the following:
11832 *
11833 * <ul>
11834 * <li> {@link #SPACE_SEPARATOR}
11835 * <li> {@link #LINE_SEPARATOR}
11836 * <li> {@link #PARAGRAPH_SEPARATOR}
11837 * </ul>
11838 *
11839 * @param codePoint the character (Unicode code point) to be tested.
11840 * @return {@code true} if the character is a space character;
11841 * {@code false} otherwise.
11842 * @see Character#isWhitespace(int)
11843 * @since 1.5
11844 */
11845 public static boolean isSpaceChar(int codePoint) {
11846 return ((((1 << Character.SPACE_SEPARATOR) |
11847 (1 << Character.LINE_SEPARATOR) |
11848 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
11849 != 0;
11850 }
11851
11852 /**
11853 * Determines if the specified character is white space according to Java.
11854 * A character is a Java whitespace character if and only if it satisfies
11855 * one of the following criteria:
11856 * <ul>
11857 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
11858 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
11859 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
11860 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11861 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11862 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11863 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11864 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11865 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11866 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11867 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11868 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11869 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11870 * </ul>
11871 *
11872 * <p><b>Note:</b> This method cannot handle <a
11873 * href="#supplementary"> supplementary characters</a>. To support
11874 * all Unicode characters, including supplementary characters, use
11875 * the {@link #isWhitespace(int)} method.
11876 *
11877 * @param ch the character to be tested.
11878 * @return {@code true} if the character is a Java whitespace
11879 * character; {@code false} otherwise.
11880 * @see Character#isSpaceChar(char)
11881 * @since 1.1
11882 */
11883 public static boolean isWhitespace(char ch) {
11884 return isWhitespace((int)ch);
11885 }
11886
11887 /**
11888 * Determines if the specified character (Unicode code point) is
11889 * white space according to Java. A character is a Java
11890 * whitespace character if and only if it satisfies one of the
11891 * following criteria:
11892 * <ul>
11893 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
11894 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
11895 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
11896 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11897 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11898 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11899 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11900 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11901 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11902 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11903 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11904 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11905 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11906 * </ul>
11907 *
11908 * @param codePoint the character (Unicode code point) to be tested.
11909 * @return {@code true} if the character is a Java whitespace
11910 * character; {@code false} otherwise.
11911 * @see Character#isSpaceChar(int)
11912 * @since 1.5
11913 */
11914 public static boolean isWhitespace(int codePoint) {
11915 return CharacterData.of(codePoint).isWhitespace(codePoint);
11916 }
11917
11918 /**
11919 * Determines if the specified character is an ISO control
11920 * character. A character is considered to be an ISO control
11921 * character if its code is in the range {@code '\u005Cu0000'}
11922 * through {@code '\u005Cu001F'} or in the range
11923 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11924 *
11925 * <p><b>Note:</b> This method cannot handle <a
11926 * href="#supplementary"> supplementary characters</a>. To support
11927 * all Unicode characters, including supplementary characters, use
11928 * the {@link #isISOControl(int)} method.
11929 *
11930 * @param ch the character to be tested.
11931 * @return {@code true} if the character is an ISO control character;
11932 * {@code false} otherwise.
11933 *
11934 * @see Character#isSpaceChar(char)
11935 * @see Character#isWhitespace(char)
11936 * @since 1.1
11937 */
11938 public static boolean isISOControl(char ch) {
11939 return isISOControl((int)ch);
11940 }
11941
11942 /**
11943 * Determines if the referenced character (Unicode code point) is an ISO control
11944 * character. A character is considered to be an ISO control
11945 * character if its code is in the range {@code '\u005Cu0000'}
11946 * through {@code '\u005Cu001F'} or in the range
11947 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11948 *
11949 * @param codePoint the character (Unicode code point) to be tested.
11950 * @return {@code true} if the character is an ISO control character;
11951 * {@code false} otherwise.
11952 * @see Character#isSpaceChar(int)
11953 * @see Character#isWhitespace(int)
11954 * @since 1.5
11955 */
11956 public static boolean isISOControl(int codePoint) {
11957 // Optimized form of:
11958 // (codePoint >= 0x00 && codePoint <= 0x1F) ||
11959 // (codePoint >= 0x7F && codePoint <= 0x9F);
11960 return codePoint <= 0x9F &&
11961 (codePoint >= 0x7F || (codePoint >>> 5 == 0));
11962 }
11963
11964 /**
11965 * Returns a value indicating a character's general category.
11966 *
11967 * <p><b>Note:</b> This method cannot handle <a
11968 * href="#supplementary"> supplementary characters</a>. To support
11969 * all Unicode characters, including supplementary characters, use
11970 * the {@link #getType(int)} method.
11971 *
11972 * @param ch the character to be tested.
11973 * @return a value of type {@code int} representing the
11974 * character's general category.
11975 * @see Character#COMBINING_SPACING_MARK
11976 * @see Character#CONNECTOR_PUNCTUATION
11977 * @see Character#CONTROL
11978 * @see Character#CURRENCY_SYMBOL
11979 * @see Character#DASH_PUNCTUATION
11980 * @see Character#DECIMAL_DIGIT_NUMBER
11981 * @see Character#ENCLOSING_MARK
11982 * @see Character#END_PUNCTUATION
11983 * @see Character#FINAL_QUOTE_PUNCTUATION
11984 * @see Character#FORMAT
11985 * @see Character#INITIAL_QUOTE_PUNCTUATION
11986 * @see Character#LETTER_NUMBER
11987 * @see Character#LINE_SEPARATOR
11988 * @see Character#LOWERCASE_LETTER
11989 * @see Character#MATH_SYMBOL
11990 * @see Character#MODIFIER_LETTER
11991 * @see Character#MODIFIER_SYMBOL
11992 * @see Character#NON_SPACING_MARK
11993 * @see Character#OTHER_LETTER
11994 * @see Character#OTHER_NUMBER
11995 * @see Character#OTHER_PUNCTUATION
11996 * @see Character#OTHER_SYMBOL
11997 * @see Character#PARAGRAPH_SEPARATOR
11998 * @see Character#PRIVATE_USE
11999 * @see Character#SPACE_SEPARATOR
12000 * @see Character#START_PUNCTUATION
12001 * @see Character#SURROGATE
12002 * @see Character#TITLECASE_LETTER
12003 * @see Character#UNASSIGNED
12004 * @see Character#UPPERCASE_LETTER
12005 * @since 1.1
12006 */
12007 public static int getType(char ch) {
12008 return getType((int)ch);
12009 }
12010
12011 /**
12012 * Returns a value indicating a character's general category.
12013 *
12014 * @param codePoint the character (Unicode code point) to be tested.
12015 * @return a value of type {@code int} representing the
12016 * character's general category.
12017 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
12018 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
12019 * @see Character#CONTROL CONTROL
12020 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
12021 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
12022 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
12023 * @see Character#ENCLOSING_MARK ENCLOSING_MARK
12024 * @see Character#END_PUNCTUATION END_PUNCTUATION
12025 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
12026 * @see Character#FORMAT FORMAT
12027 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
12028 * @see Character#LETTER_NUMBER LETTER_NUMBER
12029 * @see Character#LINE_SEPARATOR LINE_SEPARATOR
12030 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
12031 * @see Character#MATH_SYMBOL MATH_SYMBOL
12032 * @see Character#MODIFIER_LETTER MODIFIER_LETTER
12033 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
12034 * @see Character#NON_SPACING_MARK NON_SPACING_MARK
12035 * @see Character#OTHER_LETTER OTHER_LETTER
12036 * @see Character#OTHER_NUMBER OTHER_NUMBER
12037 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
12038 * @see Character#OTHER_SYMBOL OTHER_SYMBOL
12039 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
12040 * @see Character#PRIVATE_USE PRIVATE_USE
12041 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
12042 * @see Character#START_PUNCTUATION START_PUNCTUATION
12043 * @see Character#SURROGATE SURROGATE
12044 * @see Character#TITLECASE_LETTER TITLECASE_LETTER
12045 * @see Character#UNASSIGNED UNASSIGNED
12046 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
12047 * @since 1.5
12048 */
12049 public static int getType(int codePoint) {
12050 return CharacterData.of(codePoint).getType(codePoint);
12051 }
12052
12053 /**
12054 * Determines the character representation for a specific digit in
12055 * the specified radix. If the value of {@code radix} is not a
12056 * valid radix, or the value of {@code digit} is not a valid
12057 * digit in the specified radix, the null character
12058 * ({@code '\u005Cu0000'}) is returned.
12059 * <p>
12060 * The {@code radix} argument is valid if it is greater than or
12061 * equal to {@code MIN_RADIX} and less than or equal to
12062 * {@code MAX_RADIX}. The {@code digit} argument is valid if
12063 * {@code 0 <= digit < radix}.
12064 * <p>
12065 * If the digit is less than 10, then
12066 * {@code '0' + digit} is returned. Otherwise, the value
12067 * {@code 'a' + digit - 10} is returned.
12068 *
12069 * @param digit the number to convert to a character.
12070 * @param radix the radix.
12071 * @return the {@code char} representation of the specified digit
12072 * in the specified radix.
12073 * @see Character#MIN_RADIX
12074 * @see Character#MAX_RADIX
12075 * @see Character#digit(char, int)
12076 */
12077 public static char forDigit(int digit, int radix) {
12078 if ((digit >= radix) || (digit < 0)) {
12079 return '\0';
12080 }
12081 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
12082 return '\0';
12083 }
12084 if (digit < 10) {
12085 return (char)('0' + digit);
12086 }
12087 return (char)('a' - 10 + digit);
12088 }
12089
12090 /**
12091 * Returns the Unicode directionality property for the given
12092 * character. Character directionality is used to calculate the
12093 * visual ordering of text. The directionality value of undefined
12094 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
12095 *
12096 * <p><b>Note:</b> This method cannot handle <a
12097 * href="#supplementary"> supplementary characters</a>. To support
12098 * all Unicode characters, including supplementary characters, use
12099 * the {@link #getDirectionality(int)} method.
12100 *
12101 * @param ch {@code char} for which the directionality property
12102 * is requested.
12103 * @return the directionality property of the {@code char} value.
12104 *
12105 * @see Character#DIRECTIONALITY_UNDEFINED
12106 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
12107 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
12108 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
12109 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
12110 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
12111 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
12112 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
12113 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
12114 * @see Character#DIRECTIONALITY_NONSPACING_MARK
12115 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
12116 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
12117 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
12118 * @see Character#DIRECTIONALITY_WHITESPACE
12119 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
12120 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
12121 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
12122 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
12123 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
12124 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
12125 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
12126 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
12127 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
12128 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
12129 * @since 1.4
12130 */
12131 public static byte getDirectionality(char ch) {
12132 return getDirectionality((int)ch);
12133 }
12134
12135 /**
12136 * Returns the Unicode directionality property for the given
12137 * character (Unicode code point). Character directionality is
12138 * used to calculate the visual ordering of text. The
12139 * directionality value of undefined character is {@link
12140 * #DIRECTIONALITY_UNDEFINED}.
12141 *
12142 * @param codePoint the character (Unicode code point) for which
12143 * the directionality property is requested.
12144 * @return the directionality property of the character.
12145 *
12146 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
12147 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
12148 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
12149 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
12150 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
12151 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
12152 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
12153 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
12154 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
12155 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
12156 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
12157 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
12158 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
12159 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
12160 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
12161 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
12162 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
12163 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
12164 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
12165 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
12166 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
12167 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
12168 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
12169 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
12170 * @since 1.5
12171 */
12172 public static byte getDirectionality(int codePoint) {
12173 return CharacterData.of(codePoint).getDirectionality(codePoint);
12174 }
12175
12176 /**
12177 * Determines whether the character is mirrored according to the
12178 * Unicode specification. Mirrored characters should have their
12179 * glyphs horizontally mirrored when displayed in text that is
12180 * right-to-left. For example, {@code '\u005Cu0028'} LEFT
12181 * PARENTHESIS is semantically defined to be an <i>opening
12182 * parenthesis</i>. This will appear as a "(" in text that is
12183 * left-to-right but as a ")" in text that is right-to-left.
12184 *
12185 * <p><b>Note:</b> This method cannot handle <a
12186 * href="#supplementary"> supplementary characters</a>. To support
12187 * all Unicode characters, including supplementary characters, use
12188 * the {@link #isMirrored(int)} method.
12189 *
12190 * @param ch {@code char} for which the mirrored property is requested
12191 * @return {@code true} if the char is mirrored, {@code false}
12192 * if the {@code char} is not mirrored or is not defined.
12193 * @since 1.4
12194 */
12195 public static boolean isMirrored(char ch) {
12196 return isMirrored((int)ch);
12197 }
12198
12199 /**
12200 * Determines whether the specified character (Unicode code point)
12201 * is mirrored according to the Unicode specification. Mirrored
12202 * characters should have their glyphs horizontally mirrored when
12203 * displayed in text that is right-to-left. For example,
12204 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
12205 * defined to be an <i>opening parenthesis</i>. This will appear
12206 * as a "(" in text that is left-to-right but as a ")" in text
12207 * that is right-to-left.
12208 *
12209 * @param codePoint the character (Unicode code point) to be tested.
12210 * @return {@code true} if the character is mirrored, {@code false}
12211 * if the character is not mirrored or is not defined.
12212 * @since 1.5
12213 */
12214 public static boolean isMirrored(int codePoint) {
12215 return CharacterData.of(codePoint).isMirrored(codePoint);
12216 }
12217
12218 /**
12219 * Compares two {@code Character} objects numerically.
12220 *
12221 * @param anotherCharacter the {@code Character} to be compared.
12222 * @return the value {@code 0} if the argument {@code Character}
12223 * is equal to this {@code Character}; a value less than
12224 * {@code 0} if this {@code Character} is numerically less
12225 * than the {@code Character} argument; and a value greater than
12226 * {@code 0} if this {@code Character} is numerically greater
12227 * than the {@code Character} argument (unsigned comparison).
12228 * Note that this is strictly a numerical comparison; it is not
12229 * locale-dependent.
12230 * @since 1.2
12231 */
12232 public int compareTo(Character anotherCharacter) {
12233 return compare(this.value, anotherCharacter.value);
12234 }
12235
12236 /**
12237 * Compares two {@code char} values numerically.
12238 * The value returned is identical to what would be returned by:
12239 * <pre>
12240 * Character.valueOf(x).compareTo(Character.valueOf(y))
12241 * </pre>
12242 *
12243 * @param x the first {@code char} to compare
12244 * @param y the second {@code char} to compare
12245 * @return the value {@code 0} if {@code x == y};
12246 * a value less than {@code 0} if {@code x < y}; and
12247 * a value greater than {@code 0} if {@code x > y}
12248 * @since 1.7
12249 */
12250 public static int compare(char x, char y) {
12251 return x - y;
12252 }
12253
12254 /**
12255 * Converts the character (Unicode code point) argument to uppercase using
12256 * information from the UnicodeData file.
12257 *
12258 * @param codePoint the character (Unicode code point) to be converted.
12259 * @return either the uppercase equivalent of the character, if
12260 * any, or an error flag ({@code Character.ERROR})
12261 * that indicates that a 1:M {@code char} mapping exists.
12262 * @see Character#isLowerCase(char)
12263 * @see Character#isUpperCase(char)
12264 * @see Character#toLowerCase(char)
12265 * @see Character#toTitleCase(char)
12266 * @since 1.4
12267 */
12268 static int toUpperCaseEx(int codePoint) {
12269 assert isValidCodePoint(codePoint);
12270 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
12271 }
12272
12273 /**
12274 * Converts the character (Unicode code point) argument to uppercase using case
12275 * mapping information from the SpecialCasing file in the Unicode
12276 * specification. If a character has no explicit uppercase
12277 * mapping, then the {@code char} itself is returned in the
12278 * {@code char[]}.
12279 *
12280 * @param codePoint the character (Unicode code point) to be converted.
12281 * @return a {@code char[]} with the uppercased character.
12282 * @since 1.4
12283 */
12284 static char[] toUpperCaseCharArray(int codePoint) {
12285 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
12286 assert isBmpCodePoint(codePoint);
12287 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
12288 }
12289
12290 /**
12291 * The number of bits used to represent a {@code char} value in unsigned
12292 * binary form, constant {@code 16}.
12293 *
12294 * @since 1.5
12295 */
12296 public static final int SIZE = 16;
12297
12298 /**
12299 * The number of bytes used to represent a {@code char} value in unsigned
12300 * binary form.
12301 *
12302 * @since 1.8
12303 */
12304 public static final int BYTES = SIZE / Byte.SIZE;
12305
12306 /**
12307 * Returns the value obtained by reversing the order of the bytes in the
12308 * specified {@code char} value.
12309 *
12310 * @param ch The {@code char} of which to reverse the byte order.
12311 * @return the value obtained by reversing (or, equivalently, swapping)
12312 * the bytes in the specified {@code char} value.
12313 * @since 1.5
12314 */
12315 @IntrinsicCandidate
12316 public static char reverseBytes(char ch) {
12317 return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
12318 }
12319
12320 /**
12321 * Returns the name of the specified character
12322 * {@code codePoint}, or null if the code point is
12323 * {@link #UNASSIGNED unassigned}.
12324 * <p>
12325 * If the specified character is not assigned a name by
12326 * the <i>UnicodeData</i> file (part of the Unicode Character
12327 * Database maintained by the Unicode Consortium), the returned
12328 * name is the same as the result of the expression:
12329 *
12330 * <blockquote>{@code
12331 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12332 * + " "
12333 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12334 *
12335 * }</blockquote>
12336 *
12337 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name
12338 * returned by this method follows the naming scheme in the
12339 * "Unicode Name Property" section of the Unicode Standard. For other
12340 * code points, such as Hangul/Ideographs, The name generation rule above
12341 * differs from the one defined in the Unicode Standard.
12342 *
12343 * @param codePoint the character (Unicode code point)
12344 *
12345 * @return the name of the specified character, or null if
12346 * the code point is unassigned.
12347 *
12348 * @throws IllegalArgumentException if the specified
12349 * {@code codePoint} is not a valid Unicode
12350 * code point.
12351 *
12352 * @since 1.7
12353 */
12354 public static String getName(int codePoint) {
12355 if (!isValidCodePoint(codePoint)) {
12356 throw new IllegalArgumentException(
12357 String.format("Not a valid Unicode code point: 0x%X", codePoint));
12358 }
12359 String name = CharacterName.getInstance().getName(codePoint);
12360 if (name != null)
12361 return name;
12362 if (getType(codePoint) == UNASSIGNED)
12363 return null;
12364 UnicodeBlock block = UnicodeBlock.of(codePoint);
12365 if (block != null)
12366 return block.toString().replace('_', ' ') + " "
12367 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12368 // should never come here
12369 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12370 }
12371
12372 /**
12373 * Returns the code point value of the Unicode character specified by
12374 * the given character name.
12375 * <p>
12376 * If a character is not assigned a name by the <i>UnicodeData</i>
12377 * file (part of the Unicode Character Database maintained by the Unicode
12378 * Consortium), its name is defined as the result of the expression:
12379 *
12380 * <blockquote>{@code
12381 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12382 * + " "
12383 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12384 *
12385 * }</blockquote>
12386 * <p>
12387 * The {@code name} matching is case insensitive, with any leading and
12388 * trailing whitespace character removed.
12389 *
12390 * For the code points in the <i>UnicodeData</i> file, this method
12391 * recognizes the name which conforms to the name defined in the
12392 * "Unicode Name Property" section in the Unicode Standard. For other
12393 * code points, this method recognizes the name generated with
12394 * {@link #getName(int)} method.
12395 *
12396 * @param name the character name
12397 *
12398 * @return the code point value of the character specified by its name.
12399 *
12400 * @throws IllegalArgumentException if the specified {@code name}
12401 * is not a valid character name.
12402 * @throws NullPointerException if {@code name} is {@code null}
12403 *
12404 * @since 9
12405 */
12406 public static int codePointOf(String name) {
12407 name = name.trim().toUpperCase(Locale.ROOT);
12408 int cp = CharacterName.getInstance().getCodePoint(name);
12409 if (cp != -1)
12410 return cp;
12411 try {
12412 int off = name.lastIndexOf(' ');
12413 if (off != -1) {
12414 cp = Integer.parseInt(name, off + 1, name.length(), 16);
12415 if (isValidCodePoint(cp) && name.equals(getName(cp)))
12416 return cp;
12417 }
12418 } catch (Exception x) {}
12419 throw new IllegalArgumentException("Unrecognized character name :" + name);
12420 }
12421 }