1 /*
2 * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import jdk.internal.misc.CDS;
29 import jdk.internal.misc.PreviewFeatures;
30 import jdk.internal.value.DeserializeConstructor;
31 import jdk.internal.vm.annotation.IntrinsicCandidate;
32 import jdk.internal.vm.annotation.Stable;
33
34 import java.lang.constant.Constable;
35 import java.lang.constant.DynamicConstantDesc;
36 import java.util.Arrays;
37 import java.util.HashMap;
38 import java.util.Locale;
39 import java.util.Map;
40 import java.util.Objects;
41 import java.util.Optional;
42
43 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST;
44 import static java.lang.constant.ConstantDescs.CD_char;
45 import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
46
47 /**
48 * The {@code Character} class is the {@linkplain
49 * java.lang##wrapperClass wrapper class} for values of the primitive
50 * type {@code char}. An object of type {@code Character} contains a
51 * single field whose type is {@code char}.
52 *
53 * <p>In addition, this class provides a large number of static methods for
54 * determining a character's category (lowercase letter, digit, etc.)
55 * and for converting characters from uppercase to lowercase and vice
56 * versa.
57 *
58 * <h2><a id="conformance">Unicode Conformance</a></h2>
59 * <p>
60 * The fields and methods of class {@code Character} are defined in terms
61 * of character information from the Unicode Standard, specifically the
62 * <i>UnicodeData</i> file that is part of the Unicode Character Database.
63 * This file specifies properties including name and category for every
64 * assigned Unicode code point or character range. The file is available
65 * from the Unicode Consortium at
66 * <a href="http://www.unicode.org">http://www.unicode.org</a>.
67 * <p>
68 * Character information is based on the Unicode Standard, version 17.0.
69 * <p>
70 * The Java platform has supported different versions of the Unicode
71 * Standard over time. Upgrades to newer versions of the Unicode Standard
72 * occurred in the following Java releases, each indicating the new version:
73 * <table class="striped">
74 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption>
75 * <thead>
76 * <tr><th scope="col">Java release</th>
77 * <th scope="col">Unicode version</th></tr>
78 * </thead>
79 * <tbody>
80 * <tr><th scope="row" style="text-align:left">Java SE 26</th>
81 * <td>Unicode 17.0</td></tr>
82 * <tr><th scope="row" style="text-align:left">Java SE 24</th>
83 * <td>Unicode 16.0</td></tr>
84 * <tr><th scope="row" style="text-align:left">Java SE 22</th>
85 * <td>Unicode 15.1</td></tr>
86 * <tr><th scope="row" style="text-align:left">Java SE 20</th>
87 * <td>Unicode 15.0</td></tr>
88 * <tr><th scope="row" style="text-align:left">Java SE 19</th>
89 * <td>Unicode 14.0</td></tr>
90 * <tr><th scope="row" style="text-align:left">Java SE 15</th>
91 * <td>Unicode 13.0</td></tr>
92 * <tr><th scope="row" style="text-align:left">Java SE 13</th>
93 * <td>Unicode 12.1</td></tr>
94 * <tr><th scope="row" style="text-align:left">Java SE 12</th>
95 * <td>Unicode 11.0</td></tr>
96 * <tr><th scope="row" style="text-align:left">Java SE 11</th>
97 * <td>Unicode 10.0</td></tr>
98 * <tr><th scope="row" style="text-align:left">Java SE 9</th>
99 * <td>Unicode 8.0</td></tr>
100 * <tr><th scope="row" style="text-align:left">Java SE 8</th>
101 * <td>Unicode 6.2</td></tr>
102 * <tr><th scope="row" style="text-align:left">Java SE 7</th>
103 * <td>Unicode 6.0</td></tr>
104 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th>
105 * <td>Unicode 4.0</td></tr>
106 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th>
107 * <td>Unicode 3.0</td></tr>
108 * <tr><th scope="row" style="text-align:left">JDK 1.1</th>
109 * <td>Unicode 2.0</td></tr>
110 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th>
111 * <td>Unicode 1.1.5</td></tr>
112 * </tbody>
113 * </table>
114 * Variations from these base Unicode versions, such as recognized appendixes,
115 * are documented elsewhere.
116 * <h2><a id="unicode">Unicode Character Representations</a></h2>
117 *
118 * <p>The {@code char} data type (and therefore the value that a
119 * {@code Character} object encapsulates) are based on the
120 * original Unicode specification, which defined characters as
121 * fixed-width 16-bit entities. The Unicode Standard has since been
122 * changed to allow for characters whose representation requires more
123 * than 16 bits. The range of legal <em>code point</em>s is now
124 * U+0000 to U+10FFFF, known as
125 * <em><a href="https://www.unicode.org/glossary/#unicode_scalar_value">
126 * Unicode scalar value</a></em>.
127 *
128 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
129 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
130 * <a id="supplementary">Characters</a> whose code points are greater
131 * than U+FFFF are called <em>supplementary character</em>s. The Java
132 * platform uses the UTF-16 representation in {@code char} arrays and
133 * in the {@code String} and {@code StringBuffer} classes. In
134 * this representation, supplementary characters are represented as a pair
135 * of {@code char} values, the first from the <em>high-surrogates</em>
136 * range, (\uD800-\uDBFF), the second from the
137 * <em>low-surrogates</em> range (\uDC00-\uDFFF).
138 *
139 * <p>A {@code char} value, therefore, represents Basic
140 * Multilingual Plane (BMP) code points, including the surrogate
141 * code points, or code units of the UTF-16 encoding. An
142 * {@code int} value represents all Unicode code points,
143 * including supplementary code points. The lower (least significant)
144 * 21 bits of {@code int} are used to represent Unicode code
145 * points and the upper (most significant) 11 bits must be zero.
146 * Unless otherwise specified, the behavior with respect to
147 * supplementary characters and surrogate {@code char} values is
148 * as follows:
149 *
150 * <ul>
151 * <li>The methods that only accept a {@code char} value cannot support
152 * supplementary characters. They treat {@code char} values from the
153 * surrogate ranges as undefined characters. For example,
154 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
155 * this specific value if followed by any low-surrogate value in a string
156 * would represent a letter.
157 *
158 * <li>The methods that accept an {@code int} value support all
159 * Unicode characters, including supplementary characters. For
160 * example, {@code Character.isLetter(0x2F81A)} returns
161 * {@code true} because the code point value represents a letter
162 * (a CJK ideograph).
163 * </ul>
164 *
165 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
166 * used for character values in the range between U+0000 and U+10FFFF,
167 * and <em>Unicode code unit</em> is used for 16-bit
168 * {@code char} values that are code units of the <em>UTF-16</em>
169 * encoding. For more information on Unicode terminology, refer to the
170 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
171 *
172 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
173 * class; programmers should treat instances that are {@linkplain #equals(Object) equal}
174 * as interchangeable and should not use instances for synchronization, mutexes, or
175 * with {@linkplain java.lang.ref.Reference object references}.
176 *
177 * <div class="preview-block">
178 * <div class="preview-comment">
179 * When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}.
180 * Use of value class instances for synchronization, mutexes, or with
181 * {@linkplain java.lang.ref.Reference object references} result in
182 * {@link IdentityException}.
183 * </div>
184 * </div>
185 *
186 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
187 * @author Lee Boynton
188 * @author Guy Steele
189 * @author Akira Tanaka
190 * @author Martin Buchholz
191 * @author Ulf Zibis
192 * @since 1.0
193 */
194 @jdk.internal.MigratedValueClass
195 @jdk.internal.ValueBased
196 public final class Character implements java.io.Serializable, Comparable<Character>, Constable {
197 /**
198 * The minimum radix available for conversion to and from strings.
199 * The constant value of this field is the smallest value permitted
200 * for the radix argument in radix-conversion methods such as the
201 * {@code digit} method, the {@code forDigit} method, and the
202 * {@code toString} method of class {@code Integer}.
203 *
204 * @see Character#digit(char, int)
205 * @see Character#forDigit(int, int)
206 * @see Integer#toString(int, int)
207 * @see Integer#valueOf(String)
208 */
209 public static final int MIN_RADIX = 2;
210
211 /**
212 * The maximum radix available for conversion to and from strings.
213 * The constant value of this field is the largest value permitted
214 * for the radix argument in radix-conversion methods such as the
215 * {@code digit} method, the {@code forDigit} method, and the
216 * {@code toString} method of class {@code Integer}.
217 *
218 * @see Character#digit(char, int)
219 * @see Character#forDigit(int, int)
220 * @see Integer#toString(int, int)
221 * @see Integer#valueOf(String)
222 */
223 public static final int MAX_RADIX = 36;
224
225 /**
226 * The constant value of this field is the smallest value of type
227 * {@code char}, {@code '\u005Cu0000'}.
228 *
229 * @since 1.0.2
230 */
231 public static final char MIN_VALUE = '\u0000';
232
233 /**
234 * The constant value of this field is the largest value of type
235 * {@code char}, {@code '\u005CuFFFF'}.
236 *
237 * @since 1.0.2
238 */
239 public static final char MAX_VALUE = '\uFFFF';
240
241 /**
242 * The {@code Class} instance representing the primitive type
243 * {@code char}.
244 *
245 * @since 1.1
246 */
247 public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
248
249 /*
250 * Normative general types
251 */
252
253 /*
254 * General character types
255 */
256
257 /**
258 * General category "Cn" in the Unicode specification.
259 * @since 1.1
260 */
261 public static final byte UNASSIGNED = 0;
262
263 /**
264 * General category "Lu" in the Unicode specification.
265 * @since 1.1
266 */
267 public static final byte UPPERCASE_LETTER = 1;
268
269 /**
270 * General category "Ll" in the Unicode specification.
271 * @since 1.1
272 */
273 public static final byte LOWERCASE_LETTER = 2;
274
275 /**
276 * General category "Lt" in the Unicode specification.
277 * @since 1.1
278 */
279 public static final byte TITLECASE_LETTER = 3;
280
281 /**
282 * General category "Lm" in the Unicode specification.
283 * @since 1.1
284 */
285 public static final byte MODIFIER_LETTER = 4;
286
287 /**
288 * General category "Lo" in the Unicode specification.
289 * @since 1.1
290 */
291 public static final byte OTHER_LETTER = 5;
292
293 /**
294 * General category "Mn" in the Unicode specification.
295 * @since 1.1
296 */
297 public static final byte NON_SPACING_MARK = 6;
298
299 /**
300 * General category "Me" in the Unicode specification.
301 * @since 1.1
302 */
303 public static final byte ENCLOSING_MARK = 7;
304
305 /**
306 * General category "Mc" in the Unicode specification.
307 * @since 1.1
308 */
309 public static final byte COMBINING_SPACING_MARK = 8;
310
311 /**
312 * General category "Nd" in the Unicode specification.
313 * @since 1.1
314 */
315 public static final byte DECIMAL_DIGIT_NUMBER = 9;
316
317 /**
318 * General category "Nl" in the Unicode specification.
319 * @since 1.1
320 */
321 public static final byte LETTER_NUMBER = 10;
322
323 /**
324 * General category "No" in the Unicode specification.
325 * @since 1.1
326 */
327 public static final byte OTHER_NUMBER = 11;
328
329 /**
330 * General category "Zs" in the Unicode specification.
331 * @since 1.1
332 */
333 public static final byte SPACE_SEPARATOR = 12;
334
335 /**
336 * General category "Zl" in the Unicode specification.
337 * @since 1.1
338 */
339 public static final byte LINE_SEPARATOR = 13;
340
341 /**
342 * General category "Zp" in the Unicode specification.
343 * @since 1.1
344 */
345 public static final byte PARAGRAPH_SEPARATOR = 14;
346
347 /**
348 * General category "Cc" in the Unicode specification.
349 * @since 1.1
350 */
351 public static final byte CONTROL = 15;
352
353 /**
354 * General category "Cf" in the Unicode specification.
355 * @since 1.1
356 */
357 public static final byte FORMAT = 16;
358
359 /**
360 * General category "Co" in the Unicode specification.
361 * @since 1.1
362 */
363 public static final byte PRIVATE_USE = 18;
364
365 /**
366 * General category "Cs" in the Unicode specification.
367 * @since 1.1
368 */
369 public static final byte SURROGATE = 19;
370
371 /**
372 * General category "Pd" in the Unicode specification.
373 * @since 1.1
374 */
375 public static final byte DASH_PUNCTUATION = 20;
376
377 /**
378 * General category "Ps" in the Unicode specification.
379 * @since 1.1
380 */
381 public static final byte START_PUNCTUATION = 21;
382
383 /**
384 * General category "Pe" in the Unicode specification.
385 * @since 1.1
386 */
387 public static final byte END_PUNCTUATION = 22;
388
389 /**
390 * General category "Pc" in the Unicode specification.
391 * @since 1.1
392 */
393 public static final byte CONNECTOR_PUNCTUATION = 23;
394
395 /**
396 * General category "Po" in the Unicode specification.
397 * @since 1.1
398 */
399 public static final byte OTHER_PUNCTUATION = 24;
400
401 /**
402 * General category "Sm" in the Unicode specification.
403 * @since 1.1
404 */
405 public static final byte MATH_SYMBOL = 25;
406
407 /**
408 * General category "Sc" in the Unicode specification.
409 * @since 1.1
410 */
411 public static final byte CURRENCY_SYMBOL = 26;
412
413 /**
414 * General category "Sk" in the Unicode specification.
415 * @since 1.1
416 */
417 public static final byte MODIFIER_SYMBOL = 27;
418
419 /**
420 * General category "So" in the Unicode specification.
421 * @since 1.1
422 */
423 public static final byte OTHER_SYMBOL = 28;
424
425 /**
426 * General category "Pi" in the Unicode specification.
427 * @since 1.4
428 */
429 public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
430
431 /**
432 * General category "Pf" in the Unicode specification.
433 * @since 1.4
434 */
435 public static final byte FINAL_QUOTE_PUNCTUATION = 30;
436
437 /**
438 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
439 */
440 static final int ERROR = 0xFFFFFFFF;
441
442
443 /**
444 * Undefined bidirectional character type. Undefined {@code char}
445 * values have undefined directionality in the Unicode specification.
446 * @since 1.4
447 */
448 public static final byte DIRECTIONALITY_UNDEFINED = -1;
449
450 /**
451 * Strong bidirectional character type "L" in the Unicode specification.
452 * @since 1.4
453 */
454 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
455
456 /**
457 * Strong bidirectional character type "R" in the Unicode specification.
458 * @since 1.4
459 */
460 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
461
462 /**
463 * Strong bidirectional character type "AL" in the Unicode specification.
464 * @since 1.4
465 */
466 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
467
468 /**
469 * Weak bidirectional character type "EN" in the Unicode specification.
470 * @since 1.4
471 */
472 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
473
474 /**
475 * Weak bidirectional character type "ES" in the Unicode specification.
476 * @since 1.4
477 */
478 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
479
480 /**
481 * Weak bidirectional character type "ET" in the Unicode specification.
482 * @since 1.4
483 */
484 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
485
486 /**
487 * Weak bidirectional character type "AN" in the Unicode specification.
488 * @since 1.4
489 */
490 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
491
492 /**
493 * Weak bidirectional character type "CS" in the Unicode specification.
494 * @since 1.4
495 */
496 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
497
498 /**
499 * Weak bidirectional character type "NSM" in the Unicode specification.
500 * @since 1.4
501 */
502 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
503
504 /**
505 * Weak bidirectional character type "BN" in the Unicode specification.
506 * @since 1.4
507 */
508 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
509
510 /**
511 * Neutral bidirectional character type "B" in the Unicode specification.
512 * @since 1.4
513 */
514 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
515
516 /**
517 * Neutral bidirectional character type "S" in the Unicode specification.
518 * @since 1.4
519 */
520 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
521
522 /**
523 * Neutral bidirectional character type "WS" in the Unicode specification.
524 * @since 1.4
525 */
526 public static final byte DIRECTIONALITY_WHITESPACE = 12;
527
528 /**
529 * Neutral bidirectional character type "ON" in the Unicode specification.
530 * @since 1.4
531 */
532 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
533
534 /**
535 * Strong bidirectional character type "LRE" in the Unicode specification.
536 * @since 1.4
537 */
538 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
539
540 /**
541 * Strong bidirectional character type "LRO" in the Unicode specification.
542 * @since 1.4
543 */
544 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
545
546 /**
547 * Strong bidirectional character type "RLE" in the Unicode specification.
548 * @since 1.4
549 */
550 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
551
552 /**
553 * Strong bidirectional character type "RLO" in the Unicode specification.
554 * @since 1.4
555 */
556 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
557
558 /**
559 * Weak bidirectional character type "PDF" in the Unicode specification.
560 * @since 1.4
561 */
562 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
563
564 /**
565 * Weak bidirectional character type "LRI" in the Unicode specification.
566 * @since 9
567 */
568 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
569
570 /**
571 * Weak bidirectional character type "RLI" in the Unicode specification.
572 * @since 9
573 */
574 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
575
576 /**
577 * Weak bidirectional character type "FSI" in the Unicode specification.
578 * @since 9
579 */
580 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
581
582 /**
583 * Weak bidirectional character type "PDI" in the Unicode specification.
584 * @since 9
585 */
586 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
587
588 /**
589 * The minimum value of a
590 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
591 * Unicode high-surrogate code unit</a>
592 * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
593 * A high-surrogate is also known as a <i>leading-surrogate</i>.
594 *
595 * @since 1.5
596 */
597 public static final char MIN_HIGH_SURROGATE = '\uD800';
598
599 /**
600 * The maximum value of a
601 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
602 * Unicode high-surrogate code unit</a>
603 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
604 * A high-surrogate is also known as a <i>leading-surrogate</i>.
605 *
606 * @since 1.5
607 */
608 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
609
610 /**
611 * The minimum value of a
612 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
613 * Unicode low-surrogate code unit</a>
614 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
615 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
616 *
617 * @since 1.5
618 */
619 public static final char MIN_LOW_SURROGATE = '\uDC00';
620
621 /**
622 * The maximum value of a
623 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
624 * Unicode low-surrogate code unit</a>
625 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
626 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
627 *
628 * @since 1.5
629 */
630 public static final char MAX_LOW_SURROGATE = '\uDFFF';
631
632 /**
633 * The minimum value of a Unicode surrogate code unit in the
634 * UTF-16 encoding, constant {@code '\u005CuD800'}.
635 *
636 * @since 1.5
637 */
638 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
639
640 /**
641 * The maximum value of a Unicode surrogate code unit in the
642 * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
643 *
644 * @since 1.5
645 */
646 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
647
648 /**
649 * The minimum value of a
650 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
651 * Unicode supplementary code point</a>, constant {@code U+10000}.
652 *
653 * @since 1.5
654 */
655 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
656
657 /**
658 * The minimum value of a
659 * <a href="http://www.unicode.org/glossary/#code_point">
660 * Unicode code point</a>, constant {@code U+0000}.
661 *
662 * @since 1.5
663 */
664 public static final int MIN_CODE_POINT = 0x000000;
665
666 /**
667 * The maximum value of a
668 * <a href="http://www.unicode.org/glossary/#code_point">
669 * Unicode code point</a>, constant {@code U+10FFFF}.
670 *
671 * @since 1.5
672 */
673 public static final int MAX_CODE_POINT = 0X10FFFF;
674
675 /**
676 * Returns an {@link Optional} containing the nominal descriptor for this
677 * instance.
678 *
679 * @return an {@link Optional} describing the {@linkplain Character} instance
680 * @since 15
681 */
682 @Override
683 public Optional<DynamicConstantDesc<Character>> describeConstable() {
684 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value));
685 }
686
687 /**
688 * Instances of this class represent particular subsets of the Unicode
689 * character set. The only family of subsets defined in the
690 * {@code Character} class is {@link Character.UnicodeBlock}.
691 * Other portions of the Java API may define other subsets for their
692 * own purposes.
693 *
694 * @since 1.2
695 */
696 public static class Subset {
697
698 private String name;
699
700 /**
701 * Constructs a new {@code Subset} instance.
702 *
703 * @param name The name of this subset
704 * @throws NullPointerException if name is {@code null}
705 */
706 protected Subset(String name) {
707 if (name == null) {
708 throw new NullPointerException("name");
709 }
710 this.name = name;
711 }
712
713 /**
714 * Compares two {@code Subset} objects for equality.
715 * This method returns {@code true} if and only if
716 * {@code this} and the argument refer to the same
717 * object; since this method is {@code final}, this
718 * guarantee holds for all subclasses.
719 */
720 public final boolean equals(Object obj) {
721 return (this == obj);
722 }
723
724 /**
725 * Returns the standard hash code as defined by the
726 * {@link Object#hashCode} method. This method
727 * is {@code final} in order to ensure that the
728 * {@code equals} and {@code hashCode} methods will
729 * be consistent in all subclasses.
730 */
731 public final int hashCode() {
732 return super.hashCode();
733 }
734
735 /**
736 * Returns the name of this subset.
737 */
738 public final String toString() {
739 return name;
740 }
741 }
742
743 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
744 // for the latest specification of Unicode Blocks.
745
746 /**
747 * A family of character subsets representing the character blocks in the
748 * Unicode specification. Character blocks generally define characters
749 * used for a specific script or purpose. A character is contained by
750 * at most one Unicode block.
751 *
752 * @since 1.2
753 */
754 public static final class UnicodeBlock extends Subset {
755 /**
756 * NUM_ENTITIES should match the total number of UnicodeBlocks.
757 * It should be adjusted whenever the Unicode Character Database
758 * is upgraded.
759 */
760 private static final int NUM_ENTITIES = 804;
761 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES);
762
763 /**
764 * Creates a UnicodeBlock with the given identifier name.
765 * This name must be the same as the block identifier.
766 */
767 private UnicodeBlock(String idName) {
768 super(idName);
769 map.put(idName, this);
770 }
771
772 /**
773 * Creates a UnicodeBlock with the given identifier name and
774 * alias name.
775 */
776 private UnicodeBlock(String idName, String alias) {
777 this(idName);
778 map.put(alias, this);
779 }
780
781 /**
782 * Creates a UnicodeBlock with the given identifier name and
783 * alias names.
784 */
785 private UnicodeBlock(String idName, String... aliases) {
786 this(idName);
787 for (String alias : aliases)
788 map.put(alias, this);
789 }
790
791 /**
792 * Constant for the "Basic Latin" Unicode character block.
793 * @since 1.2
794 */
795 public static final UnicodeBlock BASIC_LATIN =
796 new UnicodeBlock("BASIC_LATIN",
797 "BASIC LATIN",
798 "BASICLATIN");
799
800 /**
801 * Constant for the "Latin-1 Supplement" Unicode character block.
802 * @since 1.2
803 */
804 public static final UnicodeBlock LATIN_1_SUPPLEMENT =
805 new UnicodeBlock("LATIN_1_SUPPLEMENT",
806 "LATIN-1 SUPPLEMENT",
807 "LATIN-1SUPPLEMENT");
808
809 /**
810 * Constant for the "Latin Extended-A" Unicode character block.
811 * @since 1.2
812 */
813 public static final UnicodeBlock LATIN_EXTENDED_A =
814 new UnicodeBlock("LATIN_EXTENDED_A",
815 "LATIN EXTENDED-A",
816 "LATINEXTENDED-A");
817
818 /**
819 * Constant for the "Latin Extended-B" Unicode character block.
820 * @since 1.2
821 */
822 public static final UnicodeBlock LATIN_EXTENDED_B =
823 new UnicodeBlock("LATIN_EXTENDED_B",
824 "LATIN EXTENDED-B",
825 "LATINEXTENDED-B");
826
827 /**
828 * Constant for the "IPA Extensions" Unicode character block.
829 * @since 1.2
830 */
831 public static final UnicodeBlock IPA_EXTENSIONS =
832 new UnicodeBlock("IPA_EXTENSIONS",
833 "IPA EXTENSIONS",
834 "IPAEXTENSIONS");
835
836 /**
837 * Constant for the "Spacing Modifier Letters" Unicode character block.
838 * @since 1.2
839 */
840 public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
841 new UnicodeBlock("SPACING_MODIFIER_LETTERS",
842 "SPACING MODIFIER LETTERS",
843 "SPACINGMODIFIERLETTERS");
844
845 /**
846 * Constant for the "Combining Diacritical Marks" Unicode character block.
847 * @since 1.2
848 */
849 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
850 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
851 "COMBINING DIACRITICAL MARKS",
852 "COMBININGDIACRITICALMARKS");
853
854 /**
855 * Constant for the "Greek and Coptic" Unicode character block.
856 * <p>
857 * This block was previously known as the "Greek" block.
858 *
859 * @since 1.2
860 */
861 public static final UnicodeBlock GREEK =
862 new UnicodeBlock("GREEK",
863 "GREEK AND COPTIC",
864 "GREEKANDCOPTIC");
865
866 /**
867 * Constant for the "Cyrillic" Unicode character block.
868 * @since 1.2
869 */
870 public static final UnicodeBlock CYRILLIC =
871 new UnicodeBlock("CYRILLIC");
872
873 /**
874 * Constant for the "Armenian" Unicode character block.
875 * @since 1.2
876 */
877 public static final UnicodeBlock ARMENIAN =
878 new UnicodeBlock("ARMENIAN");
879
880 /**
881 * Constant for the "Hebrew" Unicode character block.
882 * @since 1.2
883 */
884 public static final UnicodeBlock HEBREW =
885 new UnicodeBlock("HEBREW");
886
887 /**
888 * Constant for the "Arabic" Unicode character block.
889 * @since 1.2
890 */
891 public static final UnicodeBlock ARABIC =
892 new UnicodeBlock("ARABIC");
893
894 /**
895 * Constant for the "Devanagari" Unicode character block.
896 * @since 1.2
897 */
898 public static final UnicodeBlock DEVANAGARI =
899 new UnicodeBlock("DEVANAGARI");
900
901 /**
902 * Constant for the "Bengali" Unicode character block.
903 * @since 1.2
904 */
905 public static final UnicodeBlock BENGALI =
906 new UnicodeBlock("BENGALI");
907
908 /**
909 * Constant for the "Gurmukhi" Unicode character block.
910 * @since 1.2
911 */
912 public static final UnicodeBlock GURMUKHI =
913 new UnicodeBlock("GURMUKHI");
914
915 /**
916 * Constant for the "Gujarati" Unicode character block.
917 * @since 1.2
918 */
919 public static final UnicodeBlock GUJARATI =
920 new UnicodeBlock("GUJARATI");
921
922 /**
923 * Constant for the "Oriya" Unicode character block.
924 * @since 1.2
925 */
926 public static final UnicodeBlock ORIYA =
927 new UnicodeBlock("ORIYA");
928
929 /**
930 * Constant for the "Tamil" Unicode character block.
931 * @since 1.2
932 */
933 public static final UnicodeBlock TAMIL =
934 new UnicodeBlock("TAMIL");
935
936 /**
937 * Constant for the "Telugu" Unicode character block.
938 * @since 1.2
939 */
940 public static final UnicodeBlock TELUGU =
941 new UnicodeBlock("TELUGU");
942
943 /**
944 * Constant for the "Kannada" Unicode character block.
945 * @since 1.2
946 */
947 public static final UnicodeBlock KANNADA =
948 new UnicodeBlock("KANNADA");
949
950 /**
951 * Constant for the "Malayalam" Unicode character block.
952 * @since 1.2
953 */
954 public static final UnicodeBlock MALAYALAM =
955 new UnicodeBlock("MALAYALAM");
956
957 /**
958 * Constant for the "Thai" Unicode character block.
959 * @since 1.2
960 */
961 public static final UnicodeBlock THAI =
962 new UnicodeBlock("THAI");
963
964 /**
965 * Constant for the "Lao" Unicode character block.
966 * @since 1.2
967 */
968 public static final UnicodeBlock LAO =
969 new UnicodeBlock("LAO");
970
971 /**
972 * Constant for the "Tibetan" Unicode character block.
973 * @since 1.2
974 */
975 public static final UnicodeBlock TIBETAN =
976 new UnicodeBlock("TIBETAN");
977
978 /**
979 * Constant for the "Georgian" Unicode character block.
980 * @since 1.2
981 */
982 public static final UnicodeBlock GEORGIAN =
983 new UnicodeBlock("GEORGIAN");
984
985 /**
986 * Constant for the "Hangul Jamo" Unicode character block.
987 * @since 1.2
988 */
989 public static final UnicodeBlock HANGUL_JAMO =
990 new UnicodeBlock("HANGUL_JAMO",
991 "HANGUL JAMO",
992 "HANGULJAMO");
993
994 /**
995 * Constant for the "Latin Extended Additional" Unicode character block.
996 * @since 1.2
997 */
998 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
999 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
1000 "LATIN EXTENDED ADDITIONAL",
1001 "LATINEXTENDEDADDITIONAL");
1002
1003 /**
1004 * Constant for the "Greek Extended" Unicode character block.
1005 * @since 1.2
1006 */
1007 public static final UnicodeBlock GREEK_EXTENDED =
1008 new UnicodeBlock("GREEK_EXTENDED",
1009 "GREEK EXTENDED",
1010 "GREEKEXTENDED");
1011
1012 /**
1013 * Constant for the "General Punctuation" Unicode character block.
1014 * @since 1.2
1015 */
1016 public static final UnicodeBlock GENERAL_PUNCTUATION =
1017 new UnicodeBlock("GENERAL_PUNCTUATION",
1018 "GENERAL PUNCTUATION",
1019 "GENERALPUNCTUATION");
1020
1021 /**
1022 * Constant for the "Superscripts and Subscripts" Unicode character
1023 * block.
1024 * @since 1.2
1025 */
1026 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
1027 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
1028 "SUPERSCRIPTS AND SUBSCRIPTS",
1029 "SUPERSCRIPTSANDSUBSCRIPTS");
1030
1031 /**
1032 * Constant for the "Currency Symbols" Unicode character block.
1033 * @since 1.2
1034 */
1035 public static final UnicodeBlock CURRENCY_SYMBOLS =
1036 new UnicodeBlock("CURRENCY_SYMBOLS",
1037 "CURRENCY SYMBOLS",
1038 "CURRENCYSYMBOLS");
1039
1040 /**
1041 * Constant for the "Combining Diacritical Marks for Symbols" Unicode
1042 * character block.
1043 * <p>
1044 * This block was previously known as "Combining Marks for Symbols".
1045 * @since 1.2
1046 */
1047 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
1048 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
1049 "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
1050 "COMBININGDIACRITICALMARKSFORSYMBOLS",
1051 "COMBINING MARKS FOR SYMBOLS",
1052 "COMBININGMARKSFORSYMBOLS");
1053
1054 /**
1055 * Constant for the "Letterlike Symbols" Unicode character block.
1056 * @since 1.2
1057 */
1058 public static final UnicodeBlock LETTERLIKE_SYMBOLS =
1059 new UnicodeBlock("LETTERLIKE_SYMBOLS",
1060 "LETTERLIKE SYMBOLS",
1061 "LETTERLIKESYMBOLS");
1062
1063 /**
1064 * Constant for the "Number Forms" Unicode character block.
1065 * @since 1.2
1066 */
1067 public static final UnicodeBlock NUMBER_FORMS =
1068 new UnicodeBlock("NUMBER_FORMS",
1069 "NUMBER FORMS",
1070 "NUMBERFORMS");
1071
1072 /**
1073 * Constant for the "Arrows" Unicode character block.
1074 * @since 1.2
1075 */
1076 public static final UnicodeBlock ARROWS =
1077 new UnicodeBlock("ARROWS");
1078
1079 /**
1080 * Constant for the "Mathematical Operators" Unicode character block.
1081 * @since 1.2
1082 */
1083 public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1084 new UnicodeBlock("MATHEMATICAL_OPERATORS",
1085 "MATHEMATICAL OPERATORS",
1086 "MATHEMATICALOPERATORS");
1087
1088 /**
1089 * Constant for the "Miscellaneous Technical" Unicode character block.
1090 * @since 1.2
1091 */
1092 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1093 new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1094 "MISCELLANEOUS TECHNICAL",
1095 "MISCELLANEOUSTECHNICAL");
1096
1097 /**
1098 * Constant for the "Control Pictures" Unicode character block.
1099 * @since 1.2
1100 */
1101 public static final UnicodeBlock CONTROL_PICTURES =
1102 new UnicodeBlock("CONTROL_PICTURES",
1103 "CONTROL PICTURES",
1104 "CONTROLPICTURES");
1105
1106 /**
1107 * Constant for the "Optical Character Recognition" Unicode character block.
1108 * @since 1.2
1109 */
1110 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1111 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1112 "OPTICAL CHARACTER RECOGNITION",
1113 "OPTICALCHARACTERRECOGNITION");
1114
1115 /**
1116 * Constant for the "Enclosed Alphanumerics" Unicode character block.
1117 * @since 1.2
1118 */
1119 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1120 new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1121 "ENCLOSED ALPHANUMERICS",
1122 "ENCLOSEDALPHANUMERICS");
1123
1124 /**
1125 * Constant for the "Box Drawing" Unicode character block.
1126 * @since 1.2
1127 */
1128 public static final UnicodeBlock BOX_DRAWING =
1129 new UnicodeBlock("BOX_DRAWING",
1130 "BOX DRAWING",
1131 "BOXDRAWING");
1132
1133 /**
1134 * Constant for the "Block Elements" Unicode character block.
1135 * @since 1.2
1136 */
1137 public static final UnicodeBlock BLOCK_ELEMENTS =
1138 new UnicodeBlock("BLOCK_ELEMENTS",
1139 "BLOCK ELEMENTS",
1140 "BLOCKELEMENTS");
1141
1142 /**
1143 * Constant for the "Geometric Shapes" Unicode character block.
1144 * @since 1.2
1145 */
1146 public static final UnicodeBlock GEOMETRIC_SHAPES =
1147 new UnicodeBlock("GEOMETRIC_SHAPES",
1148 "GEOMETRIC SHAPES",
1149 "GEOMETRICSHAPES");
1150
1151 /**
1152 * Constant for the "Miscellaneous Symbols" Unicode character block.
1153 * @since 1.2
1154 */
1155 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1156 new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1157 "MISCELLANEOUS SYMBOLS",
1158 "MISCELLANEOUSSYMBOLS");
1159
1160 /**
1161 * Constant for the "Dingbats" Unicode character block.
1162 * @since 1.2
1163 */
1164 public static final UnicodeBlock DINGBATS =
1165 new UnicodeBlock("DINGBATS");
1166
1167 /**
1168 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1169 * @since 1.2
1170 */
1171 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1172 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1173 "CJK SYMBOLS AND PUNCTUATION",
1174 "CJKSYMBOLSANDPUNCTUATION");
1175
1176 /**
1177 * Constant for the "Hiragana" Unicode character block.
1178 * @since 1.2
1179 */
1180 public static final UnicodeBlock HIRAGANA =
1181 new UnicodeBlock("HIRAGANA");
1182
1183 /**
1184 * Constant for the "Katakana" Unicode character block.
1185 * @since 1.2
1186 */
1187 public static final UnicodeBlock KATAKANA =
1188 new UnicodeBlock("KATAKANA");
1189
1190 /**
1191 * Constant for the "Bopomofo" Unicode character block.
1192 * @since 1.2
1193 */
1194 public static final UnicodeBlock BOPOMOFO =
1195 new UnicodeBlock("BOPOMOFO");
1196
1197 /**
1198 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1199 * @since 1.2
1200 */
1201 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1202 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1203 "HANGUL COMPATIBILITY JAMO",
1204 "HANGULCOMPATIBILITYJAMO");
1205
1206 /**
1207 * Constant for the "Kanbun" Unicode character block.
1208 * @since 1.2
1209 */
1210 public static final UnicodeBlock KANBUN =
1211 new UnicodeBlock("KANBUN");
1212
1213 /**
1214 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1215 * @since 1.2
1216 */
1217 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1218 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1219 "ENCLOSED CJK LETTERS AND MONTHS",
1220 "ENCLOSEDCJKLETTERSANDMONTHS");
1221
1222 /**
1223 * Constant for the "CJK Compatibility" Unicode character block.
1224 * @since 1.2
1225 */
1226 public static final UnicodeBlock CJK_COMPATIBILITY =
1227 new UnicodeBlock("CJK_COMPATIBILITY",
1228 "CJK COMPATIBILITY",
1229 "CJKCOMPATIBILITY");
1230
1231 /**
1232 * Constant for the "CJK Unified Ideographs" Unicode character block.
1233 * @since 1.2
1234 */
1235 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1236 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1237 "CJK UNIFIED IDEOGRAPHS",
1238 "CJKUNIFIEDIDEOGRAPHS");
1239
1240 /**
1241 * Constant for the "Hangul Syllables" Unicode character block.
1242 * @since 1.2
1243 */
1244 public static final UnicodeBlock HANGUL_SYLLABLES =
1245 new UnicodeBlock("HANGUL_SYLLABLES",
1246 "HANGUL SYLLABLES",
1247 "HANGULSYLLABLES");
1248
1249 /**
1250 * Constant for the "Private Use Area" Unicode character block.
1251 * @since 1.2
1252 */
1253 public static final UnicodeBlock PRIVATE_USE_AREA =
1254 new UnicodeBlock("PRIVATE_USE_AREA",
1255 "PRIVATE USE AREA",
1256 "PRIVATEUSEAREA");
1257
1258 /**
1259 * Constant for the "CJK Compatibility Ideographs" Unicode character
1260 * block.
1261 * @since 1.2
1262 */
1263 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1264 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1265 "CJK COMPATIBILITY IDEOGRAPHS",
1266 "CJKCOMPATIBILITYIDEOGRAPHS");
1267
1268 /**
1269 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1270 * @since 1.2
1271 */
1272 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1273 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1274 "ALPHABETIC PRESENTATION FORMS",
1275 "ALPHABETICPRESENTATIONFORMS");
1276
1277 /**
1278 * Constant for the "Arabic Presentation Forms-A" Unicode character
1279 * block.
1280 * @since 1.2
1281 */
1282 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1283 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1284 "ARABIC PRESENTATION FORMS-A",
1285 "ARABICPRESENTATIONFORMS-A");
1286
1287 /**
1288 * Constant for the "Combining Half Marks" Unicode character block.
1289 * @since 1.2
1290 */
1291 public static final UnicodeBlock COMBINING_HALF_MARKS =
1292 new UnicodeBlock("COMBINING_HALF_MARKS",
1293 "COMBINING HALF MARKS",
1294 "COMBININGHALFMARKS");
1295
1296 /**
1297 * Constant for the "CJK Compatibility Forms" Unicode character block.
1298 * @since 1.2
1299 */
1300 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1301 new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1302 "CJK COMPATIBILITY FORMS",
1303 "CJKCOMPATIBILITYFORMS");
1304
1305 /**
1306 * Constant for the "Small Form Variants" Unicode character block.
1307 * @since 1.2
1308 */
1309 public static final UnicodeBlock SMALL_FORM_VARIANTS =
1310 new UnicodeBlock("SMALL_FORM_VARIANTS",
1311 "SMALL FORM VARIANTS",
1312 "SMALLFORMVARIANTS");
1313
1314 /**
1315 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1316 * @since 1.2
1317 */
1318 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1319 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1320 "ARABIC PRESENTATION FORMS-B",
1321 "ARABICPRESENTATIONFORMS-B");
1322
1323 /**
1324 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1325 * block.
1326 * @since 1.2
1327 */
1328 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1329 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1330 "HALFWIDTH AND FULLWIDTH FORMS",
1331 "HALFWIDTHANDFULLWIDTHFORMS");
1332
1333 /**
1334 * Constant for the "Specials" Unicode character block.
1335 * @since 1.2
1336 */
1337 public static final UnicodeBlock SPECIALS =
1338 new UnicodeBlock("SPECIALS");
1339
1340 /**
1341 * @deprecated
1342 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1343 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1344 * These constants match the block definitions of the Unicode Standard.
1345 * The {@link #of(char)} and {@link #of(int)} methods return the
1346 * standard constants.
1347 */
1348 @Deprecated(since="1.5")
1349 public static final UnicodeBlock SURROGATES_AREA =
1350 new UnicodeBlock("SURROGATES_AREA");
1351
1352 /**
1353 * Constant for the "Syriac" Unicode character block.
1354 * @since 1.4
1355 */
1356 public static final UnicodeBlock SYRIAC =
1357 new UnicodeBlock("SYRIAC");
1358
1359 /**
1360 * Constant for the "Thaana" Unicode character block.
1361 * @since 1.4
1362 */
1363 public static final UnicodeBlock THAANA =
1364 new UnicodeBlock("THAANA");
1365
1366 /**
1367 * Constant for the "Sinhala" Unicode character block.
1368 * @since 1.4
1369 */
1370 public static final UnicodeBlock SINHALA =
1371 new UnicodeBlock("SINHALA");
1372
1373 /**
1374 * Constant for the "Myanmar" Unicode character block.
1375 * @since 1.4
1376 */
1377 public static final UnicodeBlock MYANMAR =
1378 new UnicodeBlock("MYANMAR");
1379
1380 /**
1381 * Constant for the "Ethiopic" Unicode character block.
1382 * @since 1.4
1383 */
1384 public static final UnicodeBlock ETHIOPIC =
1385 new UnicodeBlock("ETHIOPIC");
1386
1387 /**
1388 * Constant for the "Cherokee" Unicode character block.
1389 * @since 1.4
1390 */
1391 public static final UnicodeBlock CHEROKEE =
1392 new UnicodeBlock("CHEROKEE");
1393
1394 /**
1395 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1396 * @since 1.4
1397 */
1398 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1399 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1400 "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1401 "UNIFIEDCANADIANABORIGINALSYLLABICS");
1402
1403 /**
1404 * Constant for the "Ogham" Unicode character block.
1405 * @since 1.4
1406 */
1407 public static final UnicodeBlock OGHAM =
1408 new UnicodeBlock("OGHAM");
1409
1410 /**
1411 * Constant for the "Runic" Unicode character block.
1412 * @since 1.4
1413 */
1414 public static final UnicodeBlock RUNIC =
1415 new UnicodeBlock("RUNIC");
1416
1417 /**
1418 * Constant for the "Khmer" Unicode character block.
1419 * @since 1.4
1420 */
1421 public static final UnicodeBlock KHMER =
1422 new UnicodeBlock("KHMER");
1423
1424 /**
1425 * Constant for the "Mongolian" Unicode character block.
1426 * @since 1.4
1427 */
1428 public static final UnicodeBlock MONGOLIAN =
1429 new UnicodeBlock("MONGOLIAN");
1430
1431 /**
1432 * Constant for the "Braille Patterns" Unicode character block.
1433 * @since 1.4
1434 */
1435 public static final UnicodeBlock BRAILLE_PATTERNS =
1436 new UnicodeBlock("BRAILLE_PATTERNS",
1437 "BRAILLE PATTERNS",
1438 "BRAILLEPATTERNS");
1439
1440 /**
1441 * Constant for the "CJK Radicals Supplement" Unicode character block.
1442 * @since 1.4
1443 */
1444 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1445 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1446 "CJK RADICALS SUPPLEMENT",
1447 "CJKRADICALSSUPPLEMENT");
1448
1449 /**
1450 * Constant for the "Kangxi Radicals" Unicode character block.
1451 * @since 1.4
1452 */
1453 public static final UnicodeBlock KANGXI_RADICALS =
1454 new UnicodeBlock("KANGXI_RADICALS",
1455 "KANGXI RADICALS",
1456 "KANGXIRADICALS");
1457
1458 /**
1459 * Constant for the "Ideographic Description Characters" Unicode character block.
1460 * @since 1.4
1461 */
1462 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1463 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1464 "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1465 "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1466
1467 /**
1468 * Constant for the "Bopomofo Extended" Unicode character block.
1469 * @since 1.4
1470 */
1471 public static final UnicodeBlock BOPOMOFO_EXTENDED =
1472 new UnicodeBlock("BOPOMOFO_EXTENDED",
1473 "BOPOMOFO EXTENDED",
1474 "BOPOMOFOEXTENDED");
1475
1476 /**
1477 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1478 * @since 1.4
1479 */
1480 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1481 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1482 "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1483 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1484
1485 /**
1486 * Constant for the "Yi Syllables" Unicode character block.
1487 * @since 1.4
1488 */
1489 public static final UnicodeBlock YI_SYLLABLES =
1490 new UnicodeBlock("YI_SYLLABLES",
1491 "YI SYLLABLES",
1492 "YISYLLABLES");
1493
1494 /**
1495 * Constant for the "Yi Radicals" Unicode character block.
1496 * @since 1.4
1497 */
1498 public static final UnicodeBlock YI_RADICALS =
1499 new UnicodeBlock("YI_RADICALS",
1500 "YI RADICALS",
1501 "YIRADICALS");
1502
1503 /**
1504 * Constant for the "Cyrillic Supplement" Unicode character block.
1505 * This block was previously known as the "Cyrillic Supplementary" block.
1506 * @since 1.5
1507 */
1508 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1509 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1510 "CYRILLIC SUPPLEMENTARY",
1511 "CYRILLICSUPPLEMENTARY",
1512 "CYRILLIC SUPPLEMENT",
1513 "CYRILLICSUPPLEMENT");
1514
1515 /**
1516 * Constant for the "Tagalog" Unicode character block.
1517 * @since 1.5
1518 */
1519 public static final UnicodeBlock TAGALOG =
1520 new UnicodeBlock("TAGALOG");
1521
1522 /**
1523 * Constant for the "Hanunoo" Unicode character block.
1524 * @since 1.5
1525 */
1526 public static final UnicodeBlock HANUNOO =
1527 new UnicodeBlock("HANUNOO");
1528
1529 /**
1530 * Constant for the "Buhid" Unicode character block.
1531 * @since 1.5
1532 */
1533 public static final UnicodeBlock BUHID =
1534 new UnicodeBlock("BUHID");
1535
1536 /**
1537 * Constant for the "Tagbanwa" Unicode character block.
1538 * @since 1.5
1539 */
1540 public static final UnicodeBlock TAGBANWA =
1541 new UnicodeBlock("TAGBANWA");
1542
1543 /**
1544 * Constant for the "Limbu" Unicode character block.
1545 * @since 1.5
1546 */
1547 public static final UnicodeBlock LIMBU =
1548 new UnicodeBlock("LIMBU");
1549
1550 /**
1551 * Constant for the "Tai Le" Unicode character block.
1552 * @since 1.5
1553 */
1554 public static final UnicodeBlock TAI_LE =
1555 new UnicodeBlock("TAI_LE",
1556 "TAI LE",
1557 "TAILE");
1558
1559 /**
1560 * Constant for the "Khmer Symbols" Unicode character block.
1561 * @since 1.5
1562 */
1563 public static final UnicodeBlock KHMER_SYMBOLS =
1564 new UnicodeBlock("KHMER_SYMBOLS",
1565 "KHMER SYMBOLS",
1566 "KHMERSYMBOLS");
1567
1568 /**
1569 * Constant for the "Phonetic Extensions" Unicode character block.
1570 * @since 1.5
1571 */
1572 public static final UnicodeBlock PHONETIC_EXTENSIONS =
1573 new UnicodeBlock("PHONETIC_EXTENSIONS",
1574 "PHONETIC EXTENSIONS",
1575 "PHONETICEXTENSIONS");
1576
1577 /**
1578 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1579 * @since 1.5
1580 */
1581 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1582 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1583 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1584 "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1585
1586 /**
1587 * Constant for the "Supplemental Arrows-A" Unicode character block.
1588 * @since 1.5
1589 */
1590 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1591 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1592 "SUPPLEMENTAL ARROWS-A",
1593 "SUPPLEMENTALARROWS-A");
1594
1595 /**
1596 * Constant for the "Supplemental Arrows-B" Unicode character block.
1597 * @since 1.5
1598 */
1599 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1600 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1601 "SUPPLEMENTAL ARROWS-B",
1602 "SUPPLEMENTALARROWS-B");
1603
1604 /**
1605 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1606 * character block.
1607 * @since 1.5
1608 */
1609 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1610 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1611 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1612 "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1613
1614 /**
1615 * Constant for the "Supplemental Mathematical Operators" Unicode
1616 * character block.
1617 * @since 1.5
1618 */
1619 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1620 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1621 "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1622 "SUPPLEMENTALMATHEMATICALOPERATORS");
1623
1624 /**
1625 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1626 * block.
1627 * @since 1.5
1628 */
1629 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1630 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1631 "MISCELLANEOUS SYMBOLS AND ARROWS",
1632 "MISCELLANEOUSSYMBOLSANDARROWS");
1633
1634 /**
1635 * Constant for the "Katakana Phonetic Extensions" Unicode character
1636 * block.
1637 * @since 1.5
1638 */
1639 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1640 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1641 "KATAKANA PHONETIC EXTENSIONS",
1642 "KATAKANAPHONETICEXTENSIONS");
1643
1644 /**
1645 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1646 * @since 1.5
1647 */
1648 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1649 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1650 "YIJING HEXAGRAM SYMBOLS",
1651 "YIJINGHEXAGRAMSYMBOLS");
1652
1653 /**
1654 * Constant for the "Variation Selectors" Unicode character block.
1655 * @since 1.5
1656 */
1657 public static final UnicodeBlock VARIATION_SELECTORS =
1658 new UnicodeBlock("VARIATION_SELECTORS",
1659 "VARIATION SELECTORS",
1660 "VARIATIONSELECTORS");
1661
1662 /**
1663 * Constant for the "Linear B Syllabary" Unicode character block.
1664 * @since 1.5
1665 */
1666 public static final UnicodeBlock LINEAR_B_SYLLABARY =
1667 new UnicodeBlock("LINEAR_B_SYLLABARY",
1668 "LINEAR B SYLLABARY",
1669 "LINEARBSYLLABARY");
1670
1671 /**
1672 * Constant for the "Linear B Ideograms" Unicode character block.
1673 * @since 1.5
1674 */
1675 public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1676 new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1677 "LINEAR B IDEOGRAMS",
1678 "LINEARBIDEOGRAMS");
1679
1680 /**
1681 * Constant for the "Aegean Numbers" Unicode character block.
1682 * @since 1.5
1683 */
1684 public static final UnicodeBlock AEGEAN_NUMBERS =
1685 new UnicodeBlock("AEGEAN_NUMBERS",
1686 "AEGEAN NUMBERS",
1687 "AEGEANNUMBERS");
1688
1689 /**
1690 * Constant for the "Old Italic" Unicode character block.
1691 * @since 1.5
1692 */
1693 public static final UnicodeBlock OLD_ITALIC =
1694 new UnicodeBlock("OLD_ITALIC",
1695 "OLD ITALIC",
1696 "OLDITALIC");
1697
1698 /**
1699 * Constant for the "Gothic" Unicode character block.
1700 * @since 1.5
1701 */
1702 public static final UnicodeBlock GOTHIC =
1703 new UnicodeBlock("GOTHIC");
1704
1705 /**
1706 * Constant for the "Ugaritic" Unicode character block.
1707 * @since 1.5
1708 */
1709 public static final UnicodeBlock UGARITIC =
1710 new UnicodeBlock("UGARITIC");
1711
1712 /**
1713 * Constant for the "Deseret" Unicode character block.
1714 * @since 1.5
1715 */
1716 public static final UnicodeBlock DESERET =
1717 new UnicodeBlock("DESERET");
1718
1719 /**
1720 * Constant for the "Shavian" Unicode character block.
1721 * @since 1.5
1722 */
1723 public static final UnicodeBlock SHAVIAN =
1724 new UnicodeBlock("SHAVIAN");
1725
1726 /**
1727 * Constant for the "Osmanya" Unicode character block.
1728 * @since 1.5
1729 */
1730 public static final UnicodeBlock OSMANYA =
1731 new UnicodeBlock("OSMANYA");
1732
1733 /**
1734 * Constant for the "Cypriot Syllabary" Unicode character block.
1735 * @since 1.5
1736 */
1737 public static final UnicodeBlock CYPRIOT_SYLLABARY =
1738 new UnicodeBlock("CYPRIOT_SYLLABARY",
1739 "CYPRIOT SYLLABARY",
1740 "CYPRIOTSYLLABARY");
1741
1742 /**
1743 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1744 * @since 1.5
1745 */
1746 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1747 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1748 "BYZANTINE MUSICAL SYMBOLS",
1749 "BYZANTINEMUSICALSYMBOLS");
1750
1751 /**
1752 * Constant for the "Musical Symbols" Unicode character block.
1753 * @since 1.5
1754 */
1755 public static final UnicodeBlock MUSICAL_SYMBOLS =
1756 new UnicodeBlock("MUSICAL_SYMBOLS",
1757 "MUSICAL SYMBOLS",
1758 "MUSICALSYMBOLS");
1759
1760 /**
1761 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1762 * @since 1.5
1763 */
1764 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1765 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1766 "TAI XUAN JING SYMBOLS",
1767 "TAIXUANJINGSYMBOLS");
1768
1769 /**
1770 * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1771 * character block.
1772 * @since 1.5
1773 */
1774 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1775 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1776 "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1777 "MATHEMATICALALPHANUMERICSYMBOLS");
1778
1779 /**
1780 * Constant for the "CJK Unified Ideographs Extension B" Unicode
1781 * character block.
1782 * @since 1.5
1783 */
1784 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1785 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1786 "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1787 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1788
1789 /**
1790 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1791 * @since 1.5
1792 */
1793 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1794 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1795 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1796 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1797
1798 /**
1799 * Constant for the "Tags" Unicode character block.
1800 * @since 1.5
1801 */
1802 public static final UnicodeBlock TAGS =
1803 new UnicodeBlock("TAGS");
1804
1805 /**
1806 * Constant for the "Variation Selectors Supplement" Unicode character
1807 * block.
1808 * @since 1.5
1809 */
1810 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1811 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1812 "VARIATION SELECTORS SUPPLEMENT",
1813 "VARIATIONSELECTORSSUPPLEMENT");
1814
1815 /**
1816 * Constant for the "Supplementary Private Use Area-A" Unicode character
1817 * block.
1818 * @since 1.5
1819 */
1820 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1821 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1822 "SUPPLEMENTARY PRIVATE USE AREA-A",
1823 "SUPPLEMENTARYPRIVATEUSEAREA-A");
1824
1825 /**
1826 * Constant for the "Supplementary Private Use Area-B" Unicode character
1827 * block.
1828 * @since 1.5
1829 */
1830 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1831 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1832 "SUPPLEMENTARY PRIVATE USE AREA-B",
1833 "SUPPLEMENTARYPRIVATEUSEAREA-B");
1834
1835 /**
1836 * Constant for the "High Surrogates" Unicode character block.
1837 * This block represents codepoint values in the high surrogate
1838 * range: U+D800 through U+DB7F
1839 *
1840 * @since 1.5
1841 */
1842 public static final UnicodeBlock HIGH_SURROGATES =
1843 new UnicodeBlock("HIGH_SURROGATES",
1844 "HIGH SURROGATES",
1845 "HIGHSURROGATES");
1846
1847 /**
1848 * Constant for the "High Private Use Surrogates" Unicode character
1849 * block.
1850 * This block represents codepoint values in the private use high
1851 * surrogate range: U+DB80 through U+DBFF
1852 *
1853 * @since 1.5
1854 */
1855 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1856 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1857 "HIGH PRIVATE USE SURROGATES",
1858 "HIGHPRIVATEUSESURROGATES");
1859
1860 /**
1861 * Constant for the "Low Surrogates" Unicode character block.
1862 * This block represents codepoint values in the low surrogate
1863 * range: U+DC00 through U+DFFF
1864 *
1865 * @since 1.5
1866 */
1867 public static final UnicodeBlock LOW_SURROGATES =
1868 new UnicodeBlock("LOW_SURROGATES",
1869 "LOW SURROGATES",
1870 "LOWSURROGATES");
1871
1872 /**
1873 * Constant for the "Arabic Supplement" Unicode character block.
1874 * @since 1.7
1875 */
1876 public static final UnicodeBlock ARABIC_SUPPLEMENT =
1877 new UnicodeBlock("ARABIC_SUPPLEMENT",
1878 "ARABIC SUPPLEMENT",
1879 "ARABICSUPPLEMENT");
1880
1881 /**
1882 * Constant for the "NKo" Unicode character block.
1883 * @since 1.7
1884 */
1885 public static final UnicodeBlock NKO =
1886 new UnicodeBlock("NKO");
1887
1888 /**
1889 * Constant for the "Samaritan" Unicode character block.
1890 * @since 1.7
1891 */
1892 public static final UnicodeBlock SAMARITAN =
1893 new UnicodeBlock("SAMARITAN");
1894
1895 /**
1896 * Constant for the "Mandaic" Unicode character block.
1897 * @since 1.7
1898 */
1899 public static final UnicodeBlock MANDAIC =
1900 new UnicodeBlock("MANDAIC");
1901
1902 /**
1903 * Constant for the "Ethiopic Supplement" Unicode character block.
1904 * @since 1.7
1905 */
1906 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1907 new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1908 "ETHIOPIC SUPPLEMENT",
1909 "ETHIOPICSUPPLEMENT");
1910
1911 /**
1912 * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1913 * Unicode character block.
1914 * @since 1.7
1915 */
1916 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1917 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1918 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1919 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1920
1921 /**
1922 * Constant for the "New Tai Lue" Unicode character block.
1923 * @since 1.7
1924 */
1925 public static final UnicodeBlock NEW_TAI_LUE =
1926 new UnicodeBlock("NEW_TAI_LUE",
1927 "NEW TAI LUE",
1928 "NEWTAILUE");
1929
1930 /**
1931 * Constant for the "Buginese" Unicode character block.
1932 * @since 1.7
1933 */
1934 public static final UnicodeBlock BUGINESE =
1935 new UnicodeBlock("BUGINESE");
1936
1937 /**
1938 * Constant for the "Tai Tham" Unicode character block.
1939 * @since 1.7
1940 */
1941 public static final UnicodeBlock TAI_THAM =
1942 new UnicodeBlock("TAI_THAM",
1943 "TAI THAM",
1944 "TAITHAM");
1945
1946 /**
1947 * Constant for the "Balinese" Unicode character block.
1948 * @since 1.7
1949 */
1950 public static final UnicodeBlock BALINESE =
1951 new UnicodeBlock("BALINESE");
1952
1953 /**
1954 * Constant for the "Sundanese" Unicode character block.
1955 * @since 1.7
1956 */
1957 public static final UnicodeBlock SUNDANESE =
1958 new UnicodeBlock("SUNDANESE");
1959
1960 /**
1961 * Constant for the "Batak" Unicode character block.
1962 * @since 1.7
1963 */
1964 public static final UnicodeBlock BATAK =
1965 new UnicodeBlock("BATAK");
1966
1967 /**
1968 * Constant for the "Lepcha" Unicode character block.
1969 * @since 1.7
1970 */
1971 public static final UnicodeBlock LEPCHA =
1972 new UnicodeBlock("LEPCHA");
1973
1974 /**
1975 * Constant for the "Ol Chiki" Unicode character block.
1976 * @since 1.7
1977 */
1978 public static final UnicodeBlock OL_CHIKI =
1979 new UnicodeBlock("OL_CHIKI",
1980 "OL CHIKI",
1981 "OLCHIKI");
1982
1983 /**
1984 * Constant for the "Vedic Extensions" Unicode character block.
1985 * @since 1.7
1986 */
1987 public static final UnicodeBlock VEDIC_EXTENSIONS =
1988 new UnicodeBlock("VEDIC_EXTENSIONS",
1989 "VEDIC EXTENSIONS",
1990 "VEDICEXTENSIONS");
1991
1992 /**
1993 * Constant for the "Phonetic Extensions Supplement" Unicode character
1994 * block.
1995 * @since 1.7
1996 */
1997 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1998 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1999 "PHONETIC EXTENSIONS SUPPLEMENT",
2000 "PHONETICEXTENSIONSSUPPLEMENT");
2001
2002 /**
2003 * Constant for the "Combining Diacritical Marks Supplement" Unicode
2004 * character block.
2005 * @since 1.7
2006 */
2007 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2008 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2009 "COMBINING DIACRITICAL MARKS SUPPLEMENT",
2010 "COMBININGDIACRITICALMARKSSUPPLEMENT");
2011
2012 /**
2013 * Constant for the "Glagolitic" Unicode character block.
2014 * @since 1.7
2015 */
2016 public static final UnicodeBlock GLAGOLITIC =
2017 new UnicodeBlock("GLAGOLITIC");
2018
2019 /**
2020 * Constant for the "Latin Extended-C" Unicode character block.
2021 * @since 1.7
2022 */
2023 public static final UnicodeBlock LATIN_EXTENDED_C =
2024 new UnicodeBlock("LATIN_EXTENDED_C",
2025 "LATIN EXTENDED-C",
2026 "LATINEXTENDED-C");
2027
2028 /**
2029 * Constant for the "Coptic" Unicode character block.
2030 * @since 1.7
2031 */
2032 public static final UnicodeBlock COPTIC =
2033 new UnicodeBlock("COPTIC");
2034
2035 /**
2036 * Constant for the "Georgian Supplement" Unicode character block.
2037 * @since 1.7
2038 */
2039 public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2040 new UnicodeBlock("GEORGIAN_SUPPLEMENT",
2041 "GEORGIAN SUPPLEMENT",
2042 "GEORGIANSUPPLEMENT");
2043
2044 /**
2045 * Constant for the "Tifinagh" Unicode character block.
2046 * @since 1.7
2047 */
2048 public static final UnicodeBlock TIFINAGH =
2049 new UnicodeBlock("TIFINAGH");
2050
2051 /**
2052 * Constant for the "Ethiopic Extended" Unicode character block.
2053 * @since 1.7
2054 */
2055 public static final UnicodeBlock ETHIOPIC_EXTENDED =
2056 new UnicodeBlock("ETHIOPIC_EXTENDED",
2057 "ETHIOPIC EXTENDED",
2058 "ETHIOPICEXTENDED");
2059
2060 /**
2061 * Constant for the "Cyrillic Extended-A" Unicode character block.
2062 * @since 1.7
2063 */
2064 public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2065 new UnicodeBlock("CYRILLIC_EXTENDED_A",
2066 "CYRILLIC EXTENDED-A",
2067 "CYRILLICEXTENDED-A");
2068
2069 /**
2070 * Constant for the "Supplemental Punctuation" Unicode character block.
2071 * @since 1.7
2072 */
2073 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2074 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2075 "SUPPLEMENTAL PUNCTUATION",
2076 "SUPPLEMENTALPUNCTUATION");
2077
2078 /**
2079 * Constant for the "CJK Strokes" Unicode character block.
2080 * @since 1.7
2081 */
2082 public static final UnicodeBlock CJK_STROKES =
2083 new UnicodeBlock("CJK_STROKES",
2084 "CJK STROKES",
2085 "CJKSTROKES");
2086
2087 /**
2088 * Constant for the "Lisu" Unicode character block.
2089 * @since 1.7
2090 */
2091 public static final UnicodeBlock LISU =
2092 new UnicodeBlock("LISU");
2093
2094 /**
2095 * Constant for the "Vai" Unicode character block.
2096 * @since 1.7
2097 */
2098 public static final UnicodeBlock VAI =
2099 new UnicodeBlock("VAI");
2100
2101 /**
2102 * Constant for the "Cyrillic Extended-B" Unicode character block.
2103 * @since 1.7
2104 */
2105 public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2106 new UnicodeBlock("CYRILLIC_EXTENDED_B",
2107 "CYRILLIC EXTENDED-B",
2108 "CYRILLICEXTENDED-B");
2109
2110 /**
2111 * Constant for the "Bamum" Unicode character block.
2112 * @since 1.7
2113 */
2114 public static final UnicodeBlock BAMUM =
2115 new UnicodeBlock("BAMUM");
2116
2117 /**
2118 * Constant for the "Modifier Tone Letters" Unicode character block.
2119 * @since 1.7
2120 */
2121 public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2122 new UnicodeBlock("MODIFIER_TONE_LETTERS",
2123 "MODIFIER TONE LETTERS",
2124 "MODIFIERTONELETTERS");
2125
2126 /**
2127 * Constant for the "Latin Extended-D" Unicode character block.
2128 * @since 1.7
2129 */
2130 public static final UnicodeBlock LATIN_EXTENDED_D =
2131 new UnicodeBlock("LATIN_EXTENDED_D",
2132 "LATIN EXTENDED-D",
2133 "LATINEXTENDED-D");
2134
2135 /**
2136 * Constant for the "Syloti Nagri" Unicode character block.
2137 * @since 1.7
2138 */
2139 public static final UnicodeBlock SYLOTI_NAGRI =
2140 new UnicodeBlock("SYLOTI_NAGRI",
2141 "SYLOTI NAGRI",
2142 "SYLOTINAGRI");
2143
2144 /**
2145 * Constant for the "Common Indic Number Forms" Unicode character block.
2146 * @since 1.7
2147 */
2148 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2149 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2150 "COMMON INDIC NUMBER FORMS",
2151 "COMMONINDICNUMBERFORMS");
2152
2153 /**
2154 * Constant for the "Phags-pa" Unicode character block.
2155 * @since 1.7
2156 */
2157 public static final UnicodeBlock PHAGS_PA =
2158 new UnicodeBlock("PHAGS_PA",
2159 "PHAGS-PA");
2160
2161 /**
2162 * Constant for the "Saurashtra" Unicode character block.
2163 * @since 1.7
2164 */
2165 public static final UnicodeBlock SAURASHTRA =
2166 new UnicodeBlock("SAURASHTRA");
2167
2168 /**
2169 * Constant for the "Devanagari Extended" Unicode character block.
2170 * @since 1.7
2171 */
2172 public static final UnicodeBlock DEVANAGARI_EXTENDED =
2173 new UnicodeBlock("DEVANAGARI_EXTENDED",
2174 "DEVANAGARI EXTENDED",
2175 "DEVANAGARIEXTENDED");
2176
2177 /**
2178 * Constant for the "Kayah Li" Unicode character block.
2179 * @since 1.7
2180 */
2181 public static final UnicodeBlock KAYAH_LI =
2182 new UnicodeBlock("KAYAH_LI",
2183 "KAYAH LI",
2184 "KAYAHLI");
2185
2186 /**
2187 * Constant for the "Rejang" Unicode character block.
2188 * @since 1.7
2189 */
2190 public static final UnicodeBlock REJANG =
2191 new UnicodeBlock("REJANG");
2192
2193 /**
2194 * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2195 * @since 1.7
2196 */
2197 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2198 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2199 "HANGUL JAMO EXTENDED-A",
2200 "HANGULJAMOEXTENDED-A");
2201
2202 /**
2203 * Constant for the "Javanese" Unicode character block.
2204 * @since 1.7
2205 */
2206 public static final UnicodeBlock JAVANESE =
2207 new UnicodeBlock("JAVANESE");
2208
2209 /**
2210 * Constant for the "Cham" Unicode character block.
2211 * @since 1.7
2212 */
2213 public static final UnicodeBlock CHAM =
2214 new UnicodeBlock("CHAM");
2215
2216 /**
2217 * Constant for the "Myanmar Extended-A" Unicode character block.
2218 * @since 1.7
2219 */
2220 public static final UnicodeBlock MYANMAR_EXTENDED_A =
2221 new UnicodeBlock("MYANMAR_EXTENDED_A",
2222 "MYANMAR EXTENDED-A",
2223 "MYANMAREXTENDED-A");
2224
2225 /**
2226 * Constant for the "Tai Viet" Unicode character block.
2227 * @since 1.7
2228 */
2229 public static final UnicodeBlock TAI_VIET =
2230 new UnicodeBlock("TAI_VIET",
2231 "TAI VIET",
2232 "TAIVIET");
2233
2234 /**
2235 * Constant for the "Ethiopic Extended-A" Unicode character block.
2236 * @since 1.7
2237 */
2238 public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2239 new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2240 "ETHIOPIC EXTENDED-A",
2241 "ETHIOPICEXTENDED-A");
2242
2243 /**
2244 * Constant for the "Meetei Mayek" Unicode character block.
2245 * @since 1.7
2246 */
2247 public static final UnicodeBlock MEETEI_MAYEK =
2248 new UnicodeBlock("MEETEI_MAYEK",
2249 "MEETEI MAYEK",
2250 "MEETEIMAYEK");
2251
2252 /**
2253 * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2254 * @since 1.7
2255 */
2256 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2257 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2258 "HANGUL JAMO EXTENDED-B",
2259 "HANGULJAMOEXTENDED-B");
2260
2261 /**
2262 * Constant for the "Vertical Forms" Unicode character block.
2263 * @since 1.7
2264 */
2265 public static final UnicodeBlock VERTICAL_FORMS =
2266 new UnicodeBlock("VERTICAL_FORMS",
2267 "VERTICAL FORMS",
2268 "VERTICALFORMS");
2269
2270 /**
2271 * Constant for the "Ancient Greek Numbers" Unicode character block.
2272 * @since 1.7
2273 */
2274 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2275 new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2276 "ANCIENT GREEK NUMBERS",
2277 "ANCIENTGREEKNUMBERS");
2278
2279 /**
2280 * Constant for the "Ancient Symbols" Unicode character block.
2281 * @since 1.7
2282 */
2283 public static final UnicodeBlock ANCIENT_SYMBOLS =
2284 new UnicodeBlock("ANCIENT_SYMBOLS",
2285 "ANCIENT SYMBOLS",
2286 "ANCIENTSYMBOLS");
2287
2288 /**
2289 * Constant for the "Phaistos Disc" Unicode character block.
2290 * @since 1.7
2291 */
2292 public static final UnicodeBlock PHAISTOS_DISC =
2293 new UnicodeBlock("PHAISTOS_DISC",
2294 "PHAISTOS DISC",
2295 "PHAISTOSDISC");
2296
2297 /**
2298 * Constant for the "Lycian" Unicode character block.
2299 * @since 1.7
2300 */
2301 public static final UnicodeBlock LYCIAN =
2302 new UnicodeBlock("LYCIAN");
2303
2304 /**
2305 * Constant for the "Carian" Unicode character block.
2306 * @since 1.7
2307 */
2308 public static final UnicodeBlock CARIAN =
2309 new UnicodeBlock("CARIAN");
2310
2311 /**
2312 * Constant for the "Old Persian" Unicode character block.
2313 * @since 1.7
2314 */
2315 public static final UnicodeBlock OLD_PERSIAN =
2316 new UnicodeBlock("OLD_PERSIAN",
2317 "OLD PERSIAN",
2318 "OLDPERSIAN");
2319
2320 /**
2321 * Constant for the "Imperial Aramaic" Unicode character block.
2322 * @since 1.7
2323 */
2324 public static final UnicodeBlock IMPERIAL_ARAMAIC =
2325 new UnicodeBlock("IMPERIAL_ARAMAIC",
2326 "IMPERIAL ARAMAIC",
2327 "IMPERIALARAMAIC");
2328
2329 /**
2330 * Constant for the "Phoenician" Unicode character block.
2331 * @since 1.7
2332 */
2333 public static final UnicodeBlock PHOENICIAN =
2334 new UnicodeBlock("PHOENICIAN");
2335
2336 /**
2337 * Constant for the "Lydian" Unicode character block.
2338 * @since 1.7
2339 */
2340 public static final UnicodeBlock LYDIAN =
2341 new UnicodeBlock("LYDIAN");
2342
2343 /**
2344 * Constant for the "Kharoshthi" Unicode character block.
2345 * @since 1.7
2346 */
2347 public static final UnicodeBlock KHAROSHTHI =
2348 new UnicodeBlock("KHAROSHTHI");
2349
2350 /**
2351 * Constant for the "Old South Arabian" Unicode character block.
2352 * @since 1.7
2353 */
2354 public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2355 new UnicodeBlock("OLD_SOUTH_ARABIAN",
2356 "OLD SOUTH ARABIAN",
2357 "OLDSOUTHARABIAN");
2358
2359 /**
2360 * Constant for the "Avestan" Unicode character block.
2361 * @since 1.7
2362 */
2363 public static final UnicodeBlock AVESTAN =
2364 new UnicodeBlock("AVESTAN");
2365
2366 /**
2367 * Constant for the "Inscriptional Parthian" Unicode character block.
2368 * @since 1.7
2369 */
2370 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2371 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2372 "INSCRIPTIONAL PARTHIAN",
2373 "INSCRIPTIONALPARTHIAN");
2374
2375 /**
2376 * Constant for the "Inscriptional Pahlavi" Unicode character block.
2377 * @since 1.7
2378 */
2379 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2380 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2381 "INSCRIPTIONAL PAHLAVI",
2382 "INSCRIPTIONALPAHLAVI");
2383
2384 /**
2385 * Constant for the "Old Turkic" Unicode character block.
2386 * @since 1.7
2387 */
2388 public static final UnicodeBlock OLD_TURKIC =
2389 new UnicodeBlock("OLD_TURKIC",
2390 "OLD TURKIC",
2391 "OLDTURKIC");
2392
2393 /**
2394 * Constant for the "Rumi Numeral Symbols" Unicode character block.
2395 * @since 1.7
2396 */
2397 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2398 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2399 "RUMI NUMERAL SYMBOLS",
2400 "RUMINUMERALSYMBOLS");
2401
2402 /**
2403 * Constant for the "Brahmi" Unicode character block.
2404 * @since 1.7
2405 */
2406 public static final UnicodeBlock BRAHMI =
2407 new UnicodeBlock("BRAHMI");
2408
2409 /**
2410 * Constant for the "Kaithi" Unicode character block.
2411 * @since 1.7
2412 */
2413 public static final UnicodeBlock KAITHI =
2414 new UnicodeBlock("KAITHI");
2415
2416 /**
2417 * Constant for the "Cuneiform" Unicode character block.
2418 * @since 1.7
2419 */
2420 public static final UnicodeBlock CUNEIFORM =
2421 new UnicodeBlock("CUNEIFORM");
2422
2423 /**
2424 * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2425 * character block.
2426 * @since 1.7
2427 */
2428 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2429 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2430 "CUNEIFORM NUMBERS AND PUNCTUATION",
2431 "CUNEIFORMNUMBERSANDPUNCTUATION");
2432
2433 /**
2434 * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2435 * @since 1.7
2436 */
2437 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2438 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2439 "EGYPTIAN HIEROGLYPHS",
2440 "EGYPTIANHIEROGLYPHS");
2441
2442 /**
2443 * Constant for the "Bamum Supplement" Unicode character block.
2444 * @since 1.7
2445 */
2446 public static final UnicodeBlock BAMUM_SUPPLEMENT =
2447 new UnicodeBlock("BAMUM_SUPPLEMENT",
2448 "BAMUM SUPPLEMENT",
2449 "BAMUMSUPPLEMENT");
2450
2451 /**
2452 * Constant for the "Kana Supplement" Unicode character block.
2453 * @since 1.7
2454 */
2455 public static final UnicodeBlock KANA_SUPPLEMENT =
2456 new UnicodeBlock("KANA_SUPPLEMENT",
2457 "KANA SUPPLEMENT",
2458 "KANASUPPLEMENT");
2459
2460 /**
2461 * Constant for the "Ancient Greek Musical Notation" Unicode character
2462 * block.
2463 * @since 1.7
2464 */
2465 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2466 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2467 "ANCIENT GREEK MUSICAL NOTATION",
2468 "ANCIENTGREEKMUSICALNOTATION");
2469
2470 /**
2471 * Constant for the "Counting Rod Numerals" Unicode character block.
2472 * @since 1.7
2473 */
2474 public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2475 new UnicodeBlock("COUNTING_ROD_NUMERALS",
2476 "COUNTING ROD NUMERALS",
2477 "COUNTINGRODNUMERALS");
2478
2479 /**
2480 * Constant for the "Mahjong Tiles" Unicode character block.
2481 * @since 1.7
2482 */
2483 public static final UnicodeBlock MAHJONG_TILES =
2484 new UnicodeBlock("MAHJONG_TILES",
2485 "MAHJONG TILES",
2486 "MAHJONGTILES");
2487
2488 /**
2489 * Constant for the "Domino Tiles" Unicode character block.
2490 * @since 1.7
2491 */
2492 public static final UnicodeBlock DOMINO_TILES =
2493 new UnicodeBlock("DOMINO_TILES",
2494 "DOMINO TILES",
2495 "DOMINOTILES");
2496
2497 /**
2498 * Constant for the "Playing Cards" Unicode character block.
2499 * @since 1.7
2500 */
2501 public static final UnicodeBlock PLAYING_CARDS =
2502 new UnicodeBlock("PLAYING_CARDS",
2503 "PLAYING CARDS",
2504 "PLAYINGCARDS");
2505
2506 /**
2507 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2508 * block.
2509 * @since 1.7
2510 */
2511 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2512 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2513 "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2514 "ENCLOSEDALPHANUMERICSUPPLEMENT");
2515
2516 /**
2517 * Constant for the "Enclosed Ideographic Supplement" Unicode character
2518 * block.
2519 * @since 1.7
2520 */
2521 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2522 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2523 "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2524 "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2525
2526 /**
2527 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2528 * character block.
2529 * @since 1.7
2530 */
2531 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2532 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2533 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2534 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2535
2536 /**
2537 * Constant for the "Emoticons" Unicode character block.
2538 * @since 1.7
2539 */
2540 public static final UnicodeBlock EMOTICONS =
2541 new UnicodeBlock("EMOTICONS");
2542
2543 /**
2544 * Constant for the "Transport And Map Symbols" Unicode character block.
2545 * @since 1.7
2546 */
2547 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2548 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2549 "TRANSPORT AND MAP SYMBOLS",
2550 "TRANSPORTANDMAPSYMBOLS");
2551
2552 /**
2553 * Constant for the "Alchemical Symbols" Unicode character block.
2554 * @since 1.7
2555 */
2556 public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2557 new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2558 "ALCHEMICAL SYMBOLS",
2559 "ALCHEMICALSYMBOLS");
2560
2561 /**
2562 * Constant for the "CJK Unified Ideographs Extension C" Unicode
2563 * character block.
2564 * @since 1.7
2565 */
2566 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2567 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2568 "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2569 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2570
2571 /**
2572 * Constant for the "CJK Unified Ideographs Extension D" Unicode
2573 * character block.
2574 * @since 1.7
2575 */
2576 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2577 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2578 "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2579 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2580
2581 /**
2582 * Constant for the "Arabic Extended-A" Unicode character block.
2583 * @since 1.8
2584 */
2585 public static final UnicodeBlock ARABIC_EXTENDED_A =
2586 new UnicodeBlock("ARABIC_EXTENDED_A",
2587 "ARABIC EXTENDED-A",
2588 "ARABICEXTENDED-A");
2589
2590 /**
2591 * Constant for the "Sundanese Supplement" Unicode character block.
2592 * @since 1.8
2593 */
2594 public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2595 new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2596 "SUNDANESE SUPPLEMENT",
2597 "SUNDANESESUPPLEMENT");
2598
2599 /**
2600 * Constant for the "Meetei Mayek Extensions" Unicode character block.
2601 * @since 1.8
2602 */
2603 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2604 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2605 "MEETEI MAYEK EXTENSIONS",
2606 "MEETEIMAYEKEXTENSIONS");
2607
2608 /**
2609 * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2610 * @since 1.8
2611 */
2612 public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2613 new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2614 "MEROITIC HIEROGLYPHS",
2615 "MEROITICHIEROGLYPHS");
2616
2617 /**
2618 * Constant for the "Meroitic Cursive" Unicode character block.
2619 * @since 1.8
2620 */
2621 public static final UnicodeBlock MEROITIC_CURSIVE =
2622 new UnicodeBlock("MEROITIC_CURSIVE",
2623 "MEROITIC CURSIVE",
2624 "MEROITICCURSIVE");
2625
2626 /**
2627 * Constant for the "Sora Sompeng" Unicode character block.
2628 * @since 1.8
2629 */
2630 public static final UnicodeBlock SORA_SOMPENG =
2631 new UnicodeBlock("SORA_SOMPENG",
2632 "SORA SOMPENG",
2633 "SORASOMPENG");
2634
2635 /**
2636 * Constant for the "Chakma" Unicode character block.
2637 * @since 1.8
2638 */
2639 public static final UnicodeBlock CHAKMA =
2640 new UnicodeBlock("CHAKMA");
2641
2642 /**
2643 * Constant for the "Sharada" Unicode character block.
2644 * @since 1.8
2645 */
2646 public static final UnicodeBlock SHARADA =
2647 new UnicodeBlock("SHARADA");
2648
2649 /**
2650 * Constant for the "Takri" Unicode character block.
2651 * @since 1.8
2652 */
2653 public static final UnicodeBlock TAKRI =
2654 new UnicodeBlock("TAKRI");
2655
2656 /**
2657 * Constant for the "Miao" Unicode character block.
2658 * @since 1.8
2659 */
2660 public static final UnicodeBlock MIAO =
2661 new UnicodeBlock("MIAO");
2662
2663 /**
2664 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2665 * character block.
2666 * @since 1.8
2667 */
2668 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2669 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2670 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2671 "ARABICMATHEMATICALALPHABETICSYMBOLS");
2672
2673 /**
2674 * Constant for the "Combining Diacritical Marks Extended" Unicode
2675 * character block.
2676 * @since 9
2677 */
2678 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2679 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2680 "COMBINING DIACRITICAL MARKS EXTENDED",
2681 "COMBININGDIACRITICALMARKSEXTENDED");
2682
2683 /**
2684 * Constant for the "Myanmar Extended-B" Unicode character block.
2685 * @since 9
2686 */
2687 public static final UnicodeBlock MYANMAR_EXTENDED_B =
2688 new UnicodeBlock("MYANMAR_EXTENDED_B",
2689 "MYANMAR EXTENDED-B",
2690 "MYANMAREXTENDED-B");
2691
2692 /**
2693 * Constant for the "Latin Extended-E" Unicode character block.
2694 * @since 9
2695 */
2696 public static final UnicodeBlock LATIN_EXTENDED_E =
2697 new UnicodeBlock("LATIN_EXTENDED_E",
2698 "LATIN EXTENDED-E",
2699 "LATINEXTENDED-E");
2700
2701 /**
2702 * Constant for the "Coptic Epact Numbers" Unicode character block.
2703 * @since 9
2704 */
2705 public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2706 new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2707 "COPTIC EPACT NUMBERS",
2708 "COPTICEPACTNUMBERS");
2709
2710 /**
2711 * Constant for the "Old Permic" Unicode character block.
2712 * @since 9
2713 */
2714 public static final UnicodeBlock OLD_PERMIC =
2715 new UnicodeBlock("OLD_PERMIC",
2716 "OLD PERMIC",
2717 "OLDPERMIC");
2718
2719 /**
2720 * Constant for the "Elbasan" Unicode character block.
2721 * @since 9
2722 */
2723 public static final UnicodeBlock ELBASAN =
2724 new UnicodeBlock("ELBASAN");
2725
2726 /**
2727 * Constant for the "Caucasian Albanian" Unicode character block.
2728 * @since 9
2729 */
2730 public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2731 new UnicodeBlock("CAUCASIAN_ALBANIAN",
2732 "CAUCASIAN ALBANIAN",
2733 "CAUCASIANALBANIAN");
2734
2735 /**
2736 * Constant for the "Linear A" Unicode character block.
2737 * @since 9
2738 */
2739 public static final UnicodeBlock LINEAR_A =
2740 new UnicodeBlock("LINEAR_A",
2741 "LINEAR A",
2742 "LINEARA");
2743
2744 /**
2745 * Constant for the "Palmyrene" Unicode character block.
2746 * @since 9
2747 */
2748 public static final UnicodeBlock PALMYRENE =
2749 new UnicodeBlock("PALMYRENE");
2750
2751 /**
2752 * Constant for the "Nabataean" Unicode character block.
2753 * @since 9
2754 */
2755 public static final UnicodeBlock NABATAEAN =
2756 new UnicodeBlock("NABATAEAN");
2757
2758 /**
2759 * Constant for the "Old North Arabian" Unicode character block.
2760 * @since 9
2761 */
2762 public static final UnicodeBlock OLD_NORTH_ARABIAN =
2763 new UnicodeBlock("OLD_NORTH_ARABIAN",
2764 "OLD NORTH ARABIAN",
2765 "OLDNORTHARABIAN");
2766
2767 /**
2768 * Constant for the "Manichaean" Unicode character block.
2769 * @since 9
2770 */
2771 public static final UnicodeBlock MANICHAEAN =
2772 new UnicodeBlock("MANICHAEAN");
2773
2774 /**
2775 * Constant for the "Psalter Pahlavi" Unicode character block.
2776 * @since 9
2777 */
2778 public static final UnicodeBlock PSALTER_PAHLAVI =
2779 new UnicodeBlock("PSALTER_PAHLAVI",
2780 "PSALTER PAHLAVI",
2781 "PSALTERPAHLAVI");
2782
2783 /**
2784 * Constant for the "Mahajani" Unicode character block.
2785 * @since 9
2786 */
2787 public static final UnicodeBlock MAHAJANI =
2788 new UnicodeBlock("MAHAJANI");
2789
2790 /**
2791 * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2792 * @since 9
2793 */
2794 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2795 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2796 "SINHALA ARCHAIC NUMBERS",
2797 "SINHALAARCHAICNUMBERS");
2798
2799 /**
2800 * Constant for the "Khojki" Unicode character block.
2801 * @since 9
2802 */
2803 public static final UnicodeBlock KHOJKI =
2804 new UnicodeBlock("KHOJKI");
2805
2806 /**
2807 * Constant for the "Khudawadi" Unicode character block.
2808 * @since 9
2809 */
2810 public static final UnicodeBlock KHUDAWADI =
2811 new UnicodeBlock("KHUDAWADI");
2812
2813 /**
2814 * Constant for the "Grantha" Unicode character block.
2815 * @since 9
2816 */
2817 public static final UnicodeBlock GRANTHA =
2818 new UnicodeBlock("GRANTHA");
2819
2820 /**
2821 * Constant for the "Tirhuta" Unicode character block.
2822 * @since 9
2823 */
2824 public static final UnicodeBlock TIRHUTA =
2825 new UnicodeBlock("TIRHUTA");
2826
2827 /**
2828 * Constant for the "Siddham" Unicode character block.
2829 * @since 9
2830 */
2831 public static final UnicodeBlock SIDDHAM =
2832 new UnicodeBlock("SIDDHAM");
2833
2834 /**
2835 * Constant for the "Modi" Unicode character block.
2836 * @since 9
2837 */
2838 public static final UnicodeBlock MODI =
2839 new UnicodeBlock("MODI");
2840
2841 /**
2842 * Constant for the "Warang Citi" Unicode character block.
2843 * @since 9
2844 */
2845 public static final UnicodeBlock WARANG_CITI =
2846 new UnicodeBlock("WARANG_CITI",
2847 "WARANG CITI",
2848 "WARANGCITI");
2849
2850 /**
2851 * Constant for the "Pau Cin Hau" Unicode character block.
2852 * @since 9
2853 */
2854 public static final UnicodeBlock PAU_CIN_HAU =
2855 new UnicodeBlock("PAU_CIN_HAU",
2856 "PAU CIN HAU",
2857 "PAUCINHAU");
2858
2859 /**
2860 * Constant for the "Mro" Unicode character block.
2861 * @since 9
2862 */
2863 public static final UnicodeBlock MRO =
2864 new UnicodeBlock("MRO");
2865
2866 /**
2867 * Constant for the "Bassa Vah" Unicode character block.
2868 * @since 9
2869 */
2870 public static final UnicodeBlock BASSA_VAH =
2871 new UnicodeBlock("BASSA_VAH",
2872 "BASSA VAH",
2873 "BASSAVAH");
2874
2875 /**
2876 * Constant for the "Pahawh Hmong" Unicode character block.
2877 * @since 9
2878 */
2879 public static final UnicodeBlock PAHAWH_HMONG =
2880 new UnicodeBlock("PAHAWH_HMONG",
2881 "PAHAWH HMONG",
2882 "PAHAWHHMONG");
2883
2884 /**
2885 * Constant for the "Duployan" Unicode character block.
2886 * @since 9
2887 */
2888 public static final UnicodeBlock DUPLOYAN =
2889 new UnicodeBlock("DUPLOYAN");
2890
2891 /**
2892 * Constant for the "Shorthand Format Controls" Unicode character block.
2893 * @since 9
2894 */
2895 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2896 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2897 "SHORTHAND FORMAT CONTROLS",
2898 "SHORTHANDFORMATCONTROLS");
2899
2900 /**
2901 * Constant for the "Mende Kikakui" Unicode character block.
2902 * @since 9
2903 */
2904 public static final UnicodeBlock MENDE_KIKAKUI =
2905 new UnicodeBlock("MENDE_KIKAKUI",
2906 "MENDE KIKAKUI",
2907 "MENDEKIKAKUI");
2908
2909 /**
2910 * Constant for the "Ornamental Dingbats" Unicode character block.
2911 * @since 9
2912 */
2913 public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2914 new UnicodeBlock("ORNAMENTAL_DINGBATS",
2915 "ORNAMENTAL DINGBATS",
2916 "ORNAMENTALDINGBATS");
2917
2918 /**
2919 * Constant for the "Geometric Shapes Extended" Unicode character block.
2920 * @since 9
2921 */
2922 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2923 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2924 "GEOMETRIC SHAPES EXTENDED",
2925 "GEOMETRICSHAPESEXTENDED");
2926
2927 /**
2928 * Constant for the "Supplemental Arrows-C" Unicode character block.
2929 * @since 9
2930 */
2931 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2932 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2933 "SUPPLEMENTAL ARROWS-C",
2934 "SUPPLEMENTALARROWS-C");
2935
2936 /**
2937 * Constant for the "Cherokee Supplement" Unicode character block.
2938 * @since 9
2939 */
2940 public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2941 new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2942 "CHEROKEE SUPPLEMENT",
2943 "CHEROKEESUPPLEMENT");
2944
2945 /**
2946 * Constant for the "Hatran" Unicode character block.
2947 * @since 9
2948 */
2949 public static final UnicodeBlock HATRAN =
2950 new UnicodeBlock("HATRAN");
2951
2952 /**
2953 * Constant for the "Old Hungarian" Unicode character block.
2954 * @since 9
2955 */
2956 public static final UnicodeBlock OLD_HUNGARIAN =
2957 new UnicodeBlock("OLD_HUNGARIAN",
2958 "OLD HUNGARIAN",
2959 "OLDHUNGARIAN");
2960
2961 /**
2962 * Constant for the "Multani" Unicode character block.
2963 * @since 9
2964 */
2965 public static final UnicodeBlock MULTANI =
2966 new UnicodeBlock("MULTANI");
2967
2968 /**
2969 * Constant for the "Ahom" Unicode character block.
2970 * @since 9
2971 */
2972 public static final UnicodeBlock AHOM =
2973 new UnicodeBlock("AHOM");
2974
2975 /**
2976 * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2977 * @since 9
2978 */
2979 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2980 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2981 "EARLY DYNASTIC CUNEIFORM",
2982 "EARLYDYNASTICCUNEIFORM");
2983
2984 /**
2985 * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2986 * @since 9
2987 */
2988 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2989 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2990 "ANATOLIAN HIEROGLYPHS",
2991 "ANATOLIANHIEROGLYPHS");
2992
2993 /**
2994 * Constant for the "Sutton SignWriting" Unicode character block.
2995 * @since 9
2996 */
2997 public static final UnicodeBlock SUTTON_SIGNWRITING =
2998 new UnicodeBlock("SUTTON_SIGNWRITING",
2999 "SUTTON SIGNWRITING",
3000 "SUTTONSIGNWRITING");
3001
3002 /**
3003 * Constant for the "Supplemental Symbols and Pictographs" Unicode
3004 * character block.
3005 * @since 9
3006 */
3007 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
3008 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
3009 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
3010 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
3011
3012 /**
3013 * Constant for the "CJK Unified Ideographs Extension E" Unicode
3014 * character block.
3015 * @since 9
3016 */
3017 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
3018 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
3019 "CJK UNIFIED IDEOGRAPHS EXTENSION E",
3020 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
3021
3022 /**
3023 * Constant for the "Syriac Supplement" Unicode
3024 * character block.
3025 * @since 11
3026 */
3027 public static final UnicodeBlock SYRIAC_SUPPLEMENT =
3028 new UnicodeBlock("SYRIAC_SUPPLEMENT",
3029 "SYRIAC SUPPLEMENT",
3030 "SYRIACSUPPLEMENT");
3031
3032 /**
3033 * Constant for the "Cyrillic Extended-C" Unicode
3034 * character block.
3035 * @since 11
3036 */
3037 public static final UnicodeBlock CYRILLIC_EXTENDED_C =
3038 new UnicodeBlock("CYRILLIC_EXTENDED_C",
3039 "CYRILLIC EXTENDED-C",
3040 "CYRILLICEXTENDED-C");
3041
3042 /**
3043 * Constant for the "Osage" Unicode
3044 * character block.
3045 * @since 11
3046 */
3047 public static final UnicodeBlock OSAGE =
3048 new UnicodeBlock("OSAGE");
3049
3050 /**
3051 * Constant for the "Newa" Unicode
3052 * character block.
3053 * @since 11
3054 */
3055 public static final UnicodeBlock NEWA =
3056 new UnicodeBlock("NEWA");
3057
3058 /**
3059 * Constant for the "Mongolian Supplement" Unicode
3060 * character block.
3061 * @since 11
3062 */
3063 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
3064 new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
3065 "MONGOLIAN SUPPLEMENT",
3066 "MONGOLIANSUPPLEMENT");
3067
3068 /**
3069 * Constant for the "Marchen" Unicode
3070 * character block.
3071 * @since 11
3072 */
3073 public static final UnicodeBlock MARCHEN =
3074 new UnicodeBlock("MARCHEN");
3075
3076 /**
3077 * Constant for the "Ideographic Symbols and Punctuation" Unicode
3078 * character block.
3079 * @since 11
3080 */
3081 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3082 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3083 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3084 "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3085
3086 /**
3087 * Constant for the "Tangut" Unicode
3088 * character block.
3089 * @since 11
3090 */
3091 public static final UnicodeBlock TANGUT =
3092 new UnicodeBlock("TANGUT");
3093
3094 /**
3095 * Constant for the "Tangut Components" Unicode
3096 * character block.
3097 * @since 11
3098 */
3099 public static final UnicodeBlock TANGUT_COMPONENTS =
3100 new UnicodeBlock("TANGUT_COMPONENTS",
3101 "TANGUT COMPONENTS",
3102 "TANGUTCOMPONENTS");
3103
3104 /**
3105 * Constant for the "Kana Extended-A" Unicode
3106 * character block.
3107 * @since 11
3108 */
3109 public static final UnicodeBlock KANA_EXTENDED_A =
3110 new UnicodeBlock("KANA_EXTENDED_A",
3111 "KANA EXTENDED-A",
3112 "KANAEXTENDED-A");
3113 /**
3114 * Constant for the "Glagolitic Supplement" Unicode
3115 * character block.
3116 * @since 11
3117 */
3118 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3119 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3120 "GLAGOLITIC SUPPLEMENT",
3121 "GLAGOLITICSUPPLEMENT");
3122 /**
3123 * Constant for the "Adlam" Unicode
3124 * character block.
3125 * @since 11
3126 */
3127 public static final UnicodeBlock ADLAM =
3128 new UnicodeBlock("ADLAM");
3129
3130 /**
3131 * Constant for the "Masaram Gondi" Unicode
3132 * character block.
3133 * @since 11
3134 */
3135 public static final UnicodeBlock MASARAM_GONDI =
3136 new UnicodeBlock("MASARAM_GONDI",
3137 "MASARAM GONDI",
3138 "MASARAMGONDI");
3139
3140 /**
3141 * Constant for the "Zanabazar Square" Unicode
3142 * character block.
3143 * @since 11
3144 */
3145 public static final UnicodeBlock ZANABAZAR_SQUARE =
3146 new UnicodeBlock("ZANABAZAR_SQUARE",
3147 "ZANABAZAR SQUARE",
3148 "ZANABAZARSQUARE");
3149
3150 /**
3151 * Constant for the "Nushu" Unicode
3152 * character block.
3153 * @since 11
3154 */
3155 public static final UnicodeBlock NUSHU =
3156 new UnicodeBlock("NUSHU");
3157
3158 /**
3159 * Constant for the "Soyombo" Unicode
3160 * character block.
3161 * @since 11
3162 */
3163 public static final UnicodeBlock SOYOMBO =
3164 new UnicodeBlock("SOYOMBO");
3165
3166 /**
3167 * Constant for the "Bhaiksuki" Unicode
3168 * character block.
3169 * @since 11
3170 */
3171 public static final UnicodeBlock BHAIKSUKI =
3172 new UnicodeBlock("BHAIKSUKI");
3173
3174 /**
3175 * Constant for the "CJK Unified Ideographs Extension F" Unicode
3176 * character block.
3177 * @since 11
3178 */
3179 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3180 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3181 "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3182 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3183 /**
3184 * Constant for the "Georgian Extended" Unicode
3185 * character block.
3186 * @since 12
3187 */
3188 public static final UnicodeBlock GEORGIAN_EXTENDED =
3189 new UnicodeBlock("GEORGIAN_EXTENDED",
3190 "GEORGIAN EXTENDED",
3191 "GEORGIANEXTENDED");
3192
3193 /**
3194 * Constant for the "Hanifi Rohingya" Unicode
3195 * character block.
3196 * @since 12
3197 */
3198 public static final UnicodeBlock HANIFI_ROHINGYA =
3199 new UnicodeBlock("HANIFI_ROHINGYA",
3200 "HANIFI ROHINGYA",
3201 "HANIFIROHINGYA");
3202
3203 /**
3204 * Constant for the "Old Sogdian" Unicode
3205 * character block.
3206 * @since 12
3207 */
3208 public static final UnicodeBlock OLD_SOGDIAN =
3209 new UnicodeBlock("OLD_SOGDIAN",
3210 "OLD SOGDIAN",
3211 "OLDSOGDIAN");
3212
3213 /**
3214 * Constant for the "Sogdian" Unicode
3215 * character block.
3216 * @since 12
3217 */
3218 public static final UnicodeBlock SOGDIAN =
3219 new UnicodeBlock("SOGDIAN");
3220
3221 /**
3222 * Constant for the "Dogra" Unicode
3223 * character block.
3224 * @since 12
3225 */
3226 public static final UnicodeBlock DOGRA =
3227 new UnicodeBlock("DOGRA");
3228
3229 /**
3230 * Constant for the "Gunjala Gondi" Unicode
3231 * character block.
3232 * @since 12
3233 */
3234 public static final UnicodeBlock GUNJALA_GONDI =
3235 new UnicodeBlock("GUNJALA_GONDI",
3236 "GUNJALA GONDI",
3237 "GUNJALAGONDI");
3238
3239 /**
3240 * Constant for the "Makasar" Unicode
3241 * character block.
3242 * @since 12
3243 */
3244 public static final UnicodeBlock MAKASAR =
3245 new UnicodeBlock("MAKASAR");
3246
3247 /**
3248 * Constant for the "Medefaidrin" Unicode
3249 * character block.
3250 * @since 12
3251 */
3252 public static final UnicodeBlock MEDEFAIDRIN =
3253 new UnicodeBlock("MEDEFAIDRIN");
3254
3255 /**
3256 * Constant for the "Mayan Numerals" Unicode
3257 * character block.
3258 * @since 12
3259 */
3260 public static final UnicodeBlock MAYAN_NUMERALS =
3261 new UnicodeBlock("MAYAN_NUMERALS",
3262 "MAYAN NUMERALS",
3263 "MAYANNUMERALS");
3264
3265 /**
3266 * Constant for the "Indic Siyaq Numbers" Unicode
3267 * character block.
3268 * @since 12
3269 */
3270 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3271 new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3272 "INDIC SIYAQ NUMBERS",
3273 "INDICSIYAQNUMBERS");
3274
3275 /**
3276 * Constant for the "Chess Symbols" Unicode
3277 * character block.
3278 * @since 12
3279 */
3280 public static final UnicodeBlock CHESS_SYMBOLS =
3281 new UnicodeBlock("CHESS_SYMBOLS",
3282 "CHESS SYMBOLS",
3283 "CHESSSYMBOLS");
3284
3285 /**
3286 * Constant for the "Elymaic" Unicode
3287 * character block.
3288 * @since 13
3289 */
3290 public static final UnicodeBlock ELYMAIC =
3291 new UnicodeBlock("ELYMAIC");
3292
3293 /**
3294 * Constant for the "Nandinagari" Unicode
3295 * character block.
3296 * @since 13
3297 */
3298 public static final UnicodeBlock NANDINAGARI =
3299 new UnicodeBlock("NANDINAGARI");
3300
3301 /**
3302 * Constant for the "Tamil Supplement" Unicode
3303 * character block.
3304 * @since 13
3305 */
3306 public static final UnicodeBlock TAMIL_SUPPLEMENT =
3307 new UnicodeBlock("TAMIL_SUPPLEMENT",
3308 "TAMIL SUPPLEMENT",
3309 "TAMILSUPPLEMENT");
3310
3311 /**
3312 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3313 * character block.
3314 * @since 13
3315 */
3316 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3317 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3318 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3319 "EGYPTIANHIEROGLYPHFORMATCONTROLS");
3320
3321 /**
3322 * Constant for the "Small Kana Extension" Unicode
3323 * character block.
3324 * @since 13
3325 */
3326 public static final UnicodeBlock SMALL_KANA_EXTENSION =
3327 new UnicodeBlock("SMALL_KANA_EXTENSION",
3328 "SMALL KANA EXTENSION",
3329 "SMALLKANAEXTENSION");
3330
3331 /**
3332 * Constant for the "Nyiakeng Puachue Hmong" Unicode
3333 * character block.
3334 * @since 13
3335 */
3336 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3337 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3338 "NYIAKENG PUACHUE HMONG",
3339 "NYIAKENGPUACHUEHMONG");
3340
3341 /**
3342 * Constant for the "Wancho" Unicode
3343 * character block.
3344 * @since 13
3345 */
3346 public static final UnicodeBlock WANCHO =
3347 new UnicodeBlock("WANCHO");
3348
3349 /**
3350 * Constant for the "Ottoman Siyaq Numbers" Unicode
3351 * character block.
3352 * @since 13
3353 */
3354 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3355 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3356 "OTTOMAN SIYAQ NUMBERS",
3357 "OTTOMANSIYAQNUMBERS");
3358
3359 /**
3360 * Constant for the "Symbols and Pictographs Extended-A" Unicode
3361 * character block.
3362 * @since 13
3363 */
3364 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3365 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3366 "SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3367 "SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3368
3369 /**
3370 * Constant for the "Yezidi" Unicode
3371 * character block.
3372 * @since 15
3373 */
3374 public static final UnicodeBlock YEZIDI =
3375 new UnicodeBlock("YEZIDI");
3376
3377 /**
3378 * Constant for the "Chorasmian" Unicode
3379 * character block.
3380 * @since 15
3381 */
3382 public static final UnicodeBlock CHORASMIAN =
3383 new UnicodeBlock("CHORASMIAN");
3384
3385 /**
3386 * Constant for the "Dives Akuru" Unicode
3387 * character block.
3388 * @since 15
3389 */
3390 public static final UnicodeBlock DIVES_AKURU =
3391 new UnicodeBlock("DIVES_AKURU",
3392 "DIVES AKURU",
3393 "DIVESAKURU");
3394
3395 /**
3396 * Constant for the "Lisu Supplement" Unicode
3397 * character block.
3398 * @since 15
3399 */
3400 public static final UnicodeBlock LISU_SUPPLEMENT =
3401 new UnicodeBlock("LISU_SUPPLEMENT",
3402 "LISU SUPPLEMENT",
3403 "LISUSUPPLEMENT");
3404
3405 /**
3406 * Constant for the "Khitan Small Script" Unicode
3407 * character block.
3408 * @since 15
3409 */
3410 public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
3411 new UnicodeBlock("KHITAN_SMALL_SCRIPT",
3412 "KHITAN SMALL SCRIPT",
3413 "KHITANSMALLSCRIPT");
3414
3415 /**
3416 * Constant for the "Tangut Supplement" Unicode
3417 * character block.
3418 * @since 15
3419 */
3420 public static final UnicodeBlock TANGUT_SUPPLEMENT =
3421 new UnicodeBlock("TANGUT_SUPPLEMENT",
3422 "TANGUT SUPPLEMENT",
3423 "TANGUTSUPPLEMENT");
3424
3425 /**
3426 * Constant for the "Symbols for Legacy Computing" Unicode
3427 * character block.
3428 * @since 15
3429 */
3430 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
3431 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING",
3432 "SYMBOLS FOR LEGACY COMPUTING",
3433 "SYMBOLSFORLEGACYCOMPUTING");
3434
3435 /**
3436 * Constant for the "CJK Unified Ideographs Extension G" Unicode
3437 * character block.
3438 * @since 15
3439 */
3440 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
3441 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
3442 "CJK UNIFIED IDEOGRAPHS EXTENSION G",
3443 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG");
3444
3445 /**
3446 * Constant for the "Arabic Extended-B" Unicode
3447 * character block.
3448 * @since 19
3449 */
3450 public static final UnicodeBlock ARABIC_EXTENDED_B =
3451 new UnicodeBlock("ARABIC_EXTENDED_B",
3452 "ARABIC EXTENDED-B",
3453 "ARABICEXTENDED-B");
3454
3455 /**
3456 * Constant for the "Vithkuqi" Unicode
3457 * character block.
3458 * @since 19
3459 */
3460 public static final UnicodeBlock VITHKUQI =
3461 new UnicodeBlock("VITHKUQI");
3462
3463 /**
3464 * Constant for the "Latin Extended-F" Unicode
3465 * character block.
3466 * @since 19
3467 */
3468 public static final UnicodeBlock LATIN_EXTENDED_F =
3469 new UnicodeBlock("LATIN_EXTENDED_F",
3470 "LATIN EXTENDED-F",
3471 "LATINEXTENDED-F");
3472
3473 /**
3474 * Constant for the "Old Uyghur" Unicode
3475 * character block.
3476 * @since 19
3477 */
3478 public static final UnicodeBlock OLD_UYGHUR =
3479 new UnicodeBlock("OLD_UYGHUR",
3480 "OLD UYGHUR",
3481 "OLDUYGHUR");
3482
3483 /**
3484 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode
3485 * character block.
3486 * @since 19
3487 */
3488 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A =
3489 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A",
3490 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A",
3491 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A");
3492
3493 /**
3494 * Constant for the "Cypro-Minoan" Unicode
3495 * character block.
3496 * @since 19
3497 */
3498 public static final UnicodeBlock CYPRO_MINOAN =
3499 new UnicodeBlock("CYPRO_MINOAN",
3500 "CYPRO-MINOAN",
3501 "CYPRO-MINOAN");
3502
3503 /**
3504 * Constant for the "Tangsa" Unicode
3505 * character block.
3506 * @since 19
3507 */
3508 public static final UnicodeBlock TANGSA =
3509 new UnicodeBlock("TANGSA");
3510
3511 /**
3512 * Constant for the "Kana Extended-B" Unicode
3513 * character block.
3514 * @since 19
3515 */
3516 public static final UnicodeBlock KANA_EXTENDED_B =
3517 new UnicodeBlock("KANA_EXTENDED_B",
3518 "KANA EXTENDED-B",
3519 "KANAEXTENDED-B");
3520
3521 /**
3522 * Constant for the "Znamenny Musical Notation" Unicode
3523 * character block.
3524 * @since 19
3525 */
3526 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION =
3527 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION",
3528 "ZNAMENNY MUSICAL NOTATION",
3529 "ZNAMENNYMUSICALNOTATION");
3530
3531 /**
3532 * Constant for the "Latin Extended-G" Unicode
3533 * character block.
3534 * @since 19
3535 */
3536 public static final UnicodeBlock LATIN_EXTENDED_G =
3537 new UnicodeBlock("LATIN_EXTENDED_G",
3538 "LATIN EXTENDED-G",
3539 "LATINEXTENDED-G");
3540
3541 /**
3542 * Constant for the "Toto" Unicode
3543 * character block.
3544 * @since 19
3545 */
3546 public static final UnicodeBlock TOTO =
3547 new UnicodeBlock("TOTO");
3548
3549 /**
3550 * Constant for the "Ethiopic Extended-B" Unicode
3551 * character block.
3552 * @since 19
3553 */
3554 public static final UnicodeBlock ETHIOPIC_EXTENDED_B =
3555 new UnicodeBlock("ETHIOPIC_EXTENDED_B",
3556 "ETHIOPIC EXTENDED-B",
3557 "ETHIOPICEXTENDED-B");
3558
3559 /**
3560 * Constant for the "Arabic Extended-C" Unicode
3561 * character block.
3562 * @since 20
3563 */
3564 public static final UnicodeBlock ARABIC_EXTENDED_C =
3565 new UnicodeBlock("ARABIC_EXTENDED_C",
3566 "ARABIC EXTENDED-C",
3567 "ARABICEXTENDED-C");
3568
3569 /**
3570 * Constant for the "Devanagari Extended-A" Unicode
3571 * character block.
3572 * @since 20
3573 */
3574 public static final UnicodeBlock DEVANAGARI_EXTENDED_A =
3575 new UnicodeBlock("DEVANAGARI_EXTENDED_A",
3576 "DEVANAGARI EXTENDED-A",
3577 "DEVANAGARIEXTENDED-A");
3578
3579 /**
3580 * Constant for the "Kawi" Unicode
3581 * character block.
3582 * @since 20
3583 */
3584 public static final UnicodeBlock KAWI =
3585 new UnicodeBlock("KAWI");
3586
3587 /**
3588 * Constant for the "Kaktovik Numerals" Unicode
3589 * character block.
3590 * @since 20
3591 */
3592 public static final UnicodeBlock KAKTOVIK_NUMERALS =
3593 new UnicodeBlock("KAKTOVIK_NUMERALS",
3594 "KAKTOVIK NUMERALS",
3595 "KAKTOVIKNUMERALS");
3596
3597 /**
3598 * Constant for the "Cyrillic Extended-D" Unicode
3599 * character block.
3600 * @since 20
3601 */
3602 public static final UnicodeBlock CYRILLIC_EXTENDED_D =
3603 new UnicodeBlock("CYRILLIC_EXTENDED_D",
3604 "CYRILLIC EXTENDED-D",
3605 "CYRILLICEXTENDED-D");
3606
3607 /**
3608 * Constant for the "Nag Mundari" Unicode
3609 * character block.
3610 * @since 20
3611 */
3612 public static final UnicodeBlock NAG_MUNDARI =
3613 new UnicodeBlock("NAG_MUNDARI",
3614 "NAG MUNDARI",
3615 "NAGMUNDARI");
3616
3617 /**
3618 * Constant for the "CJK Unified Ideographs Extension H" Unicode
3619 * character block.
3620 * @since 20
3621 */
3622 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H =
3623 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H",
3624 "CJK UNIFIED IDEOGRAPHS EXTENSION H",
3625 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH");
3626
3627 /**
3628 * Constant for the "CJK Unified Ideographs Extension I" Unicode
3629 * character block.
3630 * @since 22
3631 */
3632 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I =
3633 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I",
3634 "CJK UNIFIED IDEOGRAPHS EXTENSION I",
3635 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI");
3636
3637 /**
3638 * Constant for the "Todhri" Unicode
3639 * character block.
3640 * @since 24
3641 */
3642 public static final UnicodeBlock TODHRI =
3643 new UnicodeBlock("TODHRI");
3644
3645 /**
3646 * Constant for the "Garay" Unicode
3647 * character block.
3648 * @since 24
3649 */
3650 public static final UnicodeBlock GARAY =
3651 new UnicodeBlock("GARAY");
3652
3653 /**
3654 * Constant for the "Tulu-Tigalari" Unicode
3655 * character block.
3656 * @since 24
3657 */
3658 public static final UnicodeBlock TULU_TIGALARI =
3659 new UnicodeBlock("TULU_TIGALARI",
3660 "TULU-TIGALARI");
3661
3662 /**
3663 * Constant for the "Myanmar Extended-C" Unicode
3664 * character block.
3665 * @since 24
3666 */
3667 public static final UnicodeBlock MYANMAR_EXTENDED_C =
3668 new UnicodeBlock("MYANMAR_EXTENDED_C",
3669 "MYANMAR EXTENDED-C",
3670 "MYANMAREXTENDED-C");
3671
3672 /**
3673 * Constant for the "Sunuwar" Unicode
3674 * character block.
3675 * @since 24
3676 */
3677 public static final UnicodeBlock SUNUWAR =
3678 new UnicodeBlock("SUNUWAR");
3679
3680 /**
3681 * Constant for the "Egyptian Hieroglyphs Extended-A" Unicode
3682 * character block.
3683 * @since 24
3684 */
3685 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS_EXTENDED_A =
3686 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS_EXTENDED_A",
3687 "EGYPTIAN HIEROGLYPHS EXTENDED-A",
3688 "EGYPTIANHIEROGLYPHSEXTENDED-A");
3689
3690 /**
3691 * Constant for the "Gurung Khema" Unicode
3692 * character block.
3693 * @since 24
3694 */
3695 public static final UnicodeBlock GURUNG_KHEMA =
3696 new UnicodeBlock("GURUNG_KHEMA",
3697 "GURUNG KHEMA",
3698 "GURUNGKHEMA");
3699
3700 /**
3701 * Constant for the "Kirat Rai" Unicode
3702 * character block.
3703 * @since 24
3704 */
3705 public static final UnicodeBlock KIRAT_RAI =
3706 new UnicodeBlock("KIRAT_RAI",
3707 "KIRAT RAI",
3708 "KIRATRAI");
3709
3710 /**
3711 * Constant for the "Symbols for Legacy Computing Supplement" Unicode
3712 * character block.
3713 * @since 24
3714 */
3715 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT =
3716 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT",
3717 "SYMBOLS FOR LEGACY COMPUTING SUPPLEMENT",
3718 "SYMBOLSFORLEGACYCOMPUTINGSUPPLEMENT");
3719
3720 /**
3721 * Constant for the "Ol Onal" Unicode
3722 * character block.
3723 * @since 24
3724 */
3725 public static final UnicodeBlock OL_ONAL =
3726 new UnicodeBlock("OL_ONAL",
3727 "OL ONAL",
3728 "OLONAL");
3729
3730 /**
3731 * Constant for the "Sidetic" Unicode
3732 * character block.
3733 * @since 26
3734 */
3735 public static final UnicodeBlock SIDETIC =
3736 new UnicodeBlock("SIDETIC");
3737
3738 /**
3739 * Constant for the "Sharada Supplement" Unicode
3740 * character block.
3741 * @since 26
3742 */
3743 public static final UnicodeBlock SHARADA_SUPPLEMENT =
3744 new UnicodeBlock("SHARADA_SUPPLEMENT",
3745 "SHARADA SUPPLEMENT",
3746 "SHARADASUPPLEMENT");
3747
3748 /**
3749 * Constant for the "Tolong Siki" Unicode
3750 * character block.
3751 * @since 26
3752 */
3753 public static final UnicodeBlock TOLONG_SIKI =
3754 new UnicodeBlock("TOLONG_SIKI",
3755 "TOLONG SIKI",
3756 "TOLONGSIKI");
3757
3758 /**
3759 * Constant for the "Beria Erfe" Unicode
3760 * character block.
3761 * @since 26
3762 */
3763 public static final UnicodeBlock BERIA_ERFE =
3764 new UnicodeBlock("BERIA_ERFE",
3765 "BERIA ERFE",
3766 "BERIAERFE");
3767
3768 /**
3769 * Constant for the "Tangut Components Supplement" Unicode
3770 * character block.
3771 * @since 26
3772 */
3773 public static final UnicodeBlock TANGUT_COMPONENTS_SUPPLEMENT =
3774 new UnicodeBlock("TANGUT_COMPONENTS_SUPPLEMENT",
3775 "TANGUT COMPONENTS SUPPLEMENT",
3776 "TANGUTCOMPONENTSSUPPLEMENT");
3777
3778 /**
3779 * Constant for the "Miscellaneous Symbols Supplement" Unicode
3780 * character block.
3781 * @since 26
3782 */
3783 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_SUPPLEMENT =
3784 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_SUPPLEMENT",
3785 "MISCELLANEOUS SYMBOLS SUPPLEMENT",
3786 "MISCELLANEOUSSYMBOLSSUPPLEMENT");
3787
3788 /**
3789 * Constant for the "Tai Yo" Unicode
3790 * character block.
3791 * @since 26
3792 */
3793 public static final UnicodeBlock TAI_YO =
3794 new UnicodeBlock("TAI_YO",
3795 "TAI YO",
3796 "TAIYO");
3797
3798 /**
3799 * Constant for the "CJK Unified Ideographs Extension J" Unicode
3800 * character block.
3801 * @since 26
3802 */
3803 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J =
3804 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J",
3805 "CJK UNIFIED IDEOGRAPHS EXTENSION J",
3806 "CJKUNIFIEDIDEOGRAPHSEXTENSIONJ");
3807
3808
3809 private static final int[] blockStarts = {
3810 0x0000, // 0000..007F; Basic Latin
3811 0x0080, // 0080..00FF; Latin-1 Supplement
3812 0x0100, // 0100..017F; Latin Extended-A
3813 0x0180, // 0180..024F; Latin Extended-B
3814 0x0250, // 0250..02AF; IPA Extensions
3815 0x02B0, // 02B0..02FF; Spacing Modifier Letters
3816 0x0300, // 0300..036F; Combining Diacritical Marks
3817 0x0370, // 0370..03FF; Greek and Coptic
3818 0x0400, // 0400..04FF; Cyrillic
3819 0x0500, // 0500..052F; Cyrillic Supplement
3820 0x0530, // 0530..058F; Armenian
3821 0x0590, // 0590..05FF; Hebrew
3822 0x0600, // 0600..06FF; Arabic
3823 0x0700, // 0700..074F; Syriac
3824 0x0750, // 0750..077F; Arabic Supplement
3825 0x0780, // 0780..07BF; Thaana
3826 0x07C0, // 07C0..07FF; NKo
3827 0x0800, // 0800..083F; Samaritan
3828 0x0840, // 0840..085F; Mandaic
3829 0x0860, // 0860..086F; Syriac Supplement
3830 0x0870, // 0870..089F; Arabic Extended-B
3831 0x08A0, // 08A0..08FF; Arabic Extended-A
3832 0x0900, // 0900..097F; Devanagari
3833 0x0980, // 0980..09FF; Bengali
3834 0x0A00, // 0A00..0A7F; Gurmukhi
3835 0x0A80, // 0A80..0AFF; Gujarati
3836 0x0B00, // 0B00..0B7F; Oriya
3837 0x0B80, // 0B80..0BFF; Tamil
3838 0x0C00, // 0C00..0C7F; Telugu
3839 0x0C80, // 0C80..0CFF; Kannada
3840 0x0D00, // 0D00..0D7F; Malayalam
3841 0x0D80, // 0D80..0DFF; Sinhala
3842 0x0E00, // 0E00..0E7F; Thai
3843 0x0E80, // 0E80..0EFF; Lao
3844 0x0F00, // 0F00..0FFF; Tibetan
3845 0x1000, // 1000..109F; Myanmar
3846 0x10A0, // 10A0..10FF; Georgian
3847 0x1100, // 1100..11FF; Hangul Jamo
3848 0x1200, // 1200..137F; Ethiopic
3849 0x1380, // 1380..139F; Ethiopic Supplement
3850 0x13A0, // 13A0..13FF; Cherokee
3851 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
3852 0x1680, // 1680..169F; Ogham
3853 0x16A0, // 16A0..16FF; Runic
3854 0x1700, // 1700..171F; Tagalog
3855 0x1720, // 1720..173F; Hanunoo
3856 0x1740, // 1740..175F; Buhid
3857 0x1760, // 1760..177F; Tagbanwa
3858 0x1780, // 1780..17FF; Khmer
3859 0x1800, // 1800..18AF; Mongolian
3860 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3861 0x1900, // 1900..194F; Limbu
3862 0x1950, // 1950..197F; Tai Le
3863 0x1980, // 1980..19DF; New Tai Lue
3864 0x19E0, // 19E0..19FF; Khmer Symbols
3865 0x1A00, // 1A00..1A1F; Buginese
3866 0x1A20, // 1A20..1AAF; Tai Tham
3867 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended
3868 0x1B00, // 1B00..1B7F; Balinese
3869 0x1B80, // 1B80..1BBF; Sundanese
3870 0x1BC0, // 1BC0..1BFF; Batak
3871 0x1C00, // 1C00..1C4F; Lepcha
3872 0x1C50, // 1C50..1C7F; Ol Chiki
3873 0x1C80, // 1C80..1C8F; Cyrillic Extended-C
3874 0x1C90, // 1C90..1CBF; Georgian Extended
3875 0x1CC0, // 1CC0..1CCF; Sundanese Supplement
3876 0x1CD0, // 1CD0..1CFF; Vedic Extensions
3877 0x1D00, // 1D00..1D7F; Phonetic Extensions
3878 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
3879 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
3880 0x1E00, // 1E00..1EFF; Latin Extended Additional
3881 0x1F00, // 1F00..1FFF; Greek Extended
3882 0x2000, // 2000..206F; General Punctuation
3883 0x2070, // 2070..209F; Superscripts and Subscripts
3884 0x20A0, // 20A0..20CF; Currency Symbols
3885 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
3886 0x2100, // 2100..214F; Letterlike Symbols
3887 0x2150, // 2150..218F; Number Forms
3888 0x2190, // 2190..21FF; Arrows
3889 0x2200, // 2200..22FF; Mathematical Operators
3890 0x2300, // 2300..23FF; Miscellaneous Technical
3891 0x2400, // 2400..243F; Control Pictures
3892 0x2440, // 2440..245F; Optical Character Recognition
3893 0x2460, // 2460..24FF; Enclosed Alphanumerics
3894 0x2500, // 2500..257F; Box Drawing
3895 0x2580, // 2580..259F; Block Elements
3896 0x25A0, // 25A0..25FF; Geometric Shapes
3897 0x2600, // 2600..26FF; Miscellaneous Symbols
3898 0x2700, // 2700..27BF; Dingbats
3899 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3900 0x27F0, // 27F0..27FF; Supplemental Arrows-A
3901 0x2800, // 2800..28FF; Braille Patterns
3902 0x2900, // 2900..297F; Supplemental Arrows-B
3903 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B
3904 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators
3905 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows
3906 0x2C00, // 2C00..2C5F; Glagolitic
3907 0x2C60, // 2C60..2C7F; Latin Extended-C
3908 0x2C80, // 2C80..2CFF; Coptic
3909 0x2D00, // 2D00..2D2F; Georgian Supplement
3910 0x2D30, // 2D30..2D7F; Tifinagh
3911 0x2D80, // 2D80..2DDF; Ethiopic Extended
3912 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A
3913 0x2E00, // 2E00..2E7F; Supplemental Punctuation
3914 0x2E80, // 2E80..2EFF; CJK Radicals Supplement
3915 0x2F00, // 2F00..2FDF; Kangxi Radicals
3916 0x2FE0, // unassigned
3917 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters
3918 0x3000, // 3000..303F; CJK Symbols and Punctuation
3919 0x3040, // 3040..309F; Hiragana
3920 0x30A0, // 30A0..30FF; Katakana
3921 0x3100, // 3100..312F; Bopomofo
3922 0x3130, // 3130..318F; Hangul Compatibility Jamo
3923 0x3190, // 3190..319F; Kanbun
3924 0x31A0, // 31A0..31BF; Bopomofo Extended
3925 0x31C0, // 31C0..31EF; CJK Strokes
3926 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions
3927 0x3200, // 3200..32FF; Enclosed CJK Letters and Months
3928 0x3300, // 3300..33FF; CJK Compatibility
3929 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
3930 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
3931 0x4E00, // 4E00..9FFF; CJK Unified Ideographs
3932 0xA000, // A000..A48F; Yi Syllables
3933 0xA490, // A490..A4CF; Yi Radicals
3934 0xA4D0, // A4D0..A4FF; Lisu
3935 0xA500, // A500..A63F; Vai
3936 0xA640, // A640..A69F; Cyrillic Extended-B
3937 0xA6A0, // A6A0..A6FF; Bamum
3938 0xA700, // A700..A71F; Modifier Tone Letters
3939 0xA720, // A720..A7FF; Latin Extended-D
3940 0xA800, // A800..A82F; Syloti Nagri
3941 0xA830, // A830..A83F; Common Indic Number Forms
3942 0xA840, // A840..A87F; Phags-pa
3943 0xA880, // A880..A8DF; Saurashtra
3944 0xA8E0, // A8E0..A8FF; Devanagari Extended
3945 0xA900, // A900..A92F; Kayah Li
3946 0xA930, // A930..A95F; Rejang
3947 0xA960, // A960..A97F; Hangul Jamo Extended-A
3948 0xA980, // A980..A9DF; Javanese
3949 0xA9E0, // A9E0..A9FF; Myanmar Extended-B
3950 0xAA00, // AA00..AA5F; Cham
3951 0xAA60, // AA60..AA7F; Myanmar Extended-A
3952 0xAA80, // AA80..AADF; Tai Viet
3953 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions
3954 0xAB00, // AB00..AB2F; Ethiopic Extended-A
3955 0xAB30, // AB30..AB6F; Latin Extended-E
3956 0xAB70, // AB70..ABBF; Cherokee Supplement
3957 0xABC0, // ABC0..ABFF; Meetei Mayek
3958 0xAC00, // AC00..D7AF; Hangul Syllables
3959 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
3960 0xD800, // D800..DB7F; High Surrogates
3961 0xDB80, // DB80..DBFF; High Private Use Surrogates
3962 0xDC00, // DC00..DFFF; Low Surrogates
3963 0xE000, // E000..F8FF; Private Use Area
3964 0xF900, // F900..FAFF; CJK Compatibility Ideographs
3965 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
3966 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
3967 0xFE00, // FE00..FE0F; Variation Selectors
3968 0xFE10, // FE10..FE1F; Vertical Forms
3969 0xFE20, // FE20..FE2F; Combining Half Marks
3970 0xFE30, // FE30..FE4F; CJK Compatibility Forms
3971 0xFE50, // FE50..FE6F; Small Form Variants
3972 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
3973 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
3974 0xFFF0, // FFF0..FFFF; Specials
3975 0x10000, // 10000..1007F; Linear B Syllabary
3976 0x10080, // 10080..100FF; Linear B Ideograms
3977 0x10100, // 10100..1013F; Aegean Numbers
3978 0x10140, // 10140..1018F; Ancient Greek Numbers
3979 0x10190, // 10190..101CF; Ancient Symbols
3980 0x101D0, // 101D0..101FF; Phaistos Disc
3981 0x10200, // unassigned
3982 0x10280, // 10280..1029F; Lycian
3983 0x102A0, // 102A0..102DF; Carian
3984 0x102E0, // 102E0..102FF; Coptic Epact Numbers
3985 0x10300, // 10300..1032F; Old Italic
3986 0x10330, // 10330..1034F; Gothic
3987 0x10350, // 10350..1037F; Old Permic
3988 0x10380, // 10380..1039F; Ugaritic
3989 0x103A0, // 103A0..103DF; Old Persian
3990 0x103E0, // unassigned
3991 0x10400, // 10400..1044F; Deseret
3992 0x10450, // 10450..1047F; Shavian
3993 0x10480, // 10480..104AF; Osmanya
3994 0x104B0, // 104B0..104FF; Osage
3995 0x10500, // 10500..1052F; Elbasan
3996 0x10530, // 10530..1056F; Caucasian Albanian
3997 0x10570, // 10570..105BF; Vithkuqi
3998 0x105C0, // 105C0..105FF; Todhri
3999 0x10600, // 10600..1077F; Linear A
4000 0x10780, // 10780..107BF; Latin Extended-F
4001 0x107C0, // unassigned
4002 0x10800, // 10800..1083F; Cypriot Syllabary
4003 0x10840, // 10840..1085F; Imperial Aramaic
4004 0x10860, // 10860..1087F; Palmyrene
4005 0x10880, // 10880..108AF; Nabataean
4006 0x108B0, // unassigned
4007 0x108E0, // 108E0..108FF; Hatran
4008 0x10900, // 10900..1091F; Phoenician
4009 0x10920, // 10920..1093F; Lydian
4010 0x10940, // 10940..1095F; Sidetic
4011 0x10960, // unassigned
4012 0x10980, // 10980..1099F; Meroitic Hieroglyphs
4013 0x109A0, // 109A0..109FF; Meroitic Cursive
4014 0x10A00, // 10A00..10A5F; Kharoshthi
4015 0x10A60, // 10A60..10A7F; Old South Arabian
4016 0x10A80, // 10A80..10A9F; Old North Arabian
4017 0x10AA0, // unassigned
4018 0x10AC0, // 10AC0..10AFF; Manichaean
4019 0x10B00, // 10B00..10B3F; Avestan
4020 0x10B40, // 10B40..10B5F; Inscriptional Parthian
4021 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
4022 0x10B80, // 10B80..10BAF; Psalter Pahlavi
4023 0x10BB0, // unassigned
4024 0x10C00, // 10C00..10C4F; Old Turkic
4025 0x10C50, // unassigned
4026 0x10C80, // 10C80..10CFF; Old Hungarian
4027 0x10D00, // 10D00..10D3F; Hanifi Rohingya
4028 0x10D40, // 10D40..10D8F; Garay
4029 0x10D90, // unassigned
4030 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
4031 0x10E80, // 10E80..10EBF; Yezidi
4032 0x10EC0, // 10EC0..10EFF; Arabic Extended-C
4033 0x10F00, // 10F00..10F2F; Old Sogdian
4034 0x10F30, // 10F30..10F6F; Sogdian
4035 0x10F70, // 10F70..10FAF; Old Uyghur
4036 0x10FB0, // 10FB0..10FDF; Chorasmian
4037 0x10FE0, // 10FE0..10FFF; Elymaic
4038 0x11000, // 11000..1107F; Brahmi
4039 0x11080, // 11080..110CF; Kaithi
4040 0x110D0, // 110D0..110FF; Sora Sompeng
4041 0x11100, // 11100..1114F; Chakma
4042 0x11150, // 11150..1117F; Mahajani
4043 0x11180, // 11180..111DF; Sharada
4044 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers
4045 0x11200, // 11200..1124F; Khojki
4046 0x11250, // unassigned
4047 0x11280, // 11280..112AF; Multani
4048 0x112B0, // 112B0..112FF; Khudawadi
4049 0x11300, // 11300..1137F; Grantha
4050 0x11380, // 11380..113FF; Tulu-Tigalari
4051 0x11400, // 11400..1147F; Newa
4052 0x11480, // 11480..114DF; Tirhuta
4053 0x114E0, // unassigned
4054 0x11580, // 11580..115FF; Siddham
4055 0x11600, // 11600..1165F; Modi
4056 0x11660, // 11660..1167F; Mongolian Supplement
4057 0x11680, // 11680..116CF; Takri
4058 0x116D0, // 116D0..116FF; Myanmar Extended-C
4059 0x11700, // 11700..1174F; Ahom
4060 0x11750, // unassigned
4061 0x11800, // 11800..1184F; Dogra
4062 0x11850, // unassigned
4063 0x118A0, // 118A0..118FF; Warang Citi
4064 0x11900, // 11900..1195F; Dives Akuru
4065 0x11960, // unassigned
4066 0x119A0, // 119A0..119FF; Nandinagari
4067 0x11A00, // 11A00..11A4F; Zanabazar Square
4068 0x11A50, // 11A50..11AAF; Soyombo
4069 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
4070 0x11AC0, // 11AC0..11AFF; Pau Cin Hau
4071 0x11B00, // 11B00..11B5F; Devanagari Extended-A
4072 0x11B60, // 11B60..11B7F; Sharada Supplement
4073 0x11B80, // unassigned
4074 0x11BC0, // 11BC0..11BFF; Sunuwar
4075 0x11C00, // 11C00..11C6F; Bhaiksuki
4076 0x11C70, // 11C70..11CBF; Marchen
4077 0x11CC0, // unassigned
4078 0x11D00, // 11D00..11D5F; Masaram Gondi
4079 0x11D60, // 11D60..11DAF; Gunjala Gondi
4080 0x11DB0, // 11DB0..11DEF; Tolong Siki
4081 0x11DF0, // unassigned
4082 0x11EE0, // 11EE0..11EFF; Makasar
4083 0x11F00, // 11F00..11F5F; Kawi
4084 0x11F60, // unassigned
4085 0x11FB0, // 11FB0..11FBF; Lisu Supplement
4086 0x11FC0, // 11FC0..11FFF; Tamil Supplement
4087 0x12000, // 12000..123FF; Cuneiform
4088 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
4089 0x12480, // 12480..1254F; Early Dynastic Cuneiform
4090 0x12550, // unassigned
4091 0x12F90, // 12F90..12FFF; Cypro-Minoan
4092 0x13000, // 13000..1342F; Egyptian Hieroglyphs
4093 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls
4094 0x13460, // 13460..143FF; Egyptian Hieroglyphs Extended-A
4095 0x14400, // 14400..1467F; Anatolian Hieroglyphs
4096 0x14680, // unassigned
4097 0x16100, // 16100..1613F; Gurung Khema
4098 0x16140, // unassigned
4099 0x16800, // 16800..16A3F; Bamum Supplement
4100 0x16A40, // 16A40..16A6F; Mro
4101 0x16A70, // 16A70..16ACF; Tangsa
4102 0x16AD0, // 16AD0..16AFF; Bassa Vah
4103 0x16B00, // 16B00..16B8F; Pahawh Hmong
4104 0x16B90, // unassigned
4105 0x16D40, // 16D40..16D7F; Kirat Rai
4106 0x16D80, // unassigned
4107 0x16E40, // 16E40..16E9F; Medefaidrin
4108 0x16EA0, // 16EA0..16EDF; Beria Erfe
4109 0x16EE0, // unassigned
4110 0x16F00, // 16F00..16F9F; Miao
4111 0x16FA0, // unassigned
4112 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation
4113 0x17000, // 17000..187FF; Tangut
4114 0x18800, // 18800..18AFF; Tangut Components
4115 0x18B00, // 18B00..18CFF; Khitan Small Script
4116 0x18D00, // 18D00..18D7F; Tangut Supplement
4117 0x18D80, // 18D80..18DFF; Tangut Components Supplement
4118 0x18E00, // unassigned
4119 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B
4120 0x1B000, // 1B000..1B0FF; Kana Supplement
4121 0x1B100, // 1B100..1B12F; Kana Extended-A
4122 0x1B130, // 1B130..1B16F; Small Kana Extension
4123 0x1B170, // 1B170..1B2FF; Nushu
4124 0x1B300, // unassigned
4125 0x1BC00, // 1BC00..1BC9F; Duployan
4126 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls
4127 0x1BCB0, // unassigned
4128 0x1CC00, // 1CC00..1CEBF; Symbols for Legacy Computing Supplement
4129 0x1CEC0, // 1CEC0..1CEFF; Miscellaneous Symbols Supplement
4130 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation
4131 0x1CFD0, // unassigned
4132 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
4133 0x1D100, // 1D100..1D1FF; Musical Symbols
4134 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
4135 0x1D250, // unassigned
4136 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals
4137 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals
4138 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
4139 0x1D360, // 1D360..1D37F; Counting Rod Numerals
4140 0x1D380, // unassigned
4141 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
4142 0x1D800, // 1D800..1DAAF; Sutton SignWriting
4143 0x1DAB0, // unassigned
4144 0x1DF00, // 1DF00..1DFFF; Latin Extended-G
4145 0x1E000, // 1E000..1E02F; Glagolitic Supplement
4146 0x1E030, // 1E030..1E08F; Cyrillic Extended-D
4147 0x1E090, // unassigned
4148 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong
4149 0x1E150, // unassigned
4150 0x1E290, // 1E290..1E2BF; Toto
4151 0x1E2C0, // 1E2C0..1E2FF; Wancho
4152 0x1E300, // unassigned
4153 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari
4154 0x1E500, // unassigned
4155 0x1E5D0, // 1E5D0..1E5FF; Ol Onal
4156 0x1E600, // unassigned
4157 0x1E6C0, // 1E6C0..1E6FF; Tai Yo
4158 0x1E700, // unassigned
4159 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B
4160 0x1E800, // 1E800..1E8DF; Mende Kikakui
4161 0x1E8E0, // unassigned
4162 0x1E900, // 1E900..1E95F; Adlam
4163 0x1E960, // unassigned
4164 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers
4165 0x1ECC0, // unassigned
4166 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers
4167 0x1ED50, // unassigned
4168 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
4169 0x1EF00, // unassigned
4170 0x1F000, // 1F000..1F02F; Mahjong Tiles
4171 0x1F030, // 1F030..1F09F; Domino Tiles
4172 0x1F0A0, // 1F0A0..1F0FF; Playing Cards
4173 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
4174 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
4175 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
4176 0x1F600, // 1F600..1F64F; Emoticons
4177 0x1F650, // 1F650..1F67F; Ornamental Dingbats
4178 0x1F680, // 1F680..1F6FF; Transport and Map Symbols
4179 0x1F700, // 1F700..1F77F; Alchemical Symbols
4180 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended
4181 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C
4182 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs
4183 0x1FA00, // 1FA00..1FA6F; Chess Symbols
4184 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A
4185 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing
4186 0x1FC00, // unassigned
4187 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
4188 0x2A6E0, // unassigned
4189 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
4190 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
4191 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
4192 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
4193 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I
4194 0x2EE60, // unassigned
4195 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
4196 0x2FA20, // unassigned
4197 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G
4198 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H
4199 0x323B0, // 323B0..3347F; CJK Unified Ideographs Extension J
4200 0x33480, // unassigned
4201 0xE0000, // E0000..E007F; Tags
4202 0xE0080, // unassigned
4203 0xE0100, // E0100..E01EF; Variation Selectors Supplement
4204 0xE01F0, // unassigned
4205 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
4206 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
4207 };
4208
4209 private static final UnicodeBlock[] blocks = {
4210 BASIC_LATIN,
4211 LATIN_1_SUPPLEMENT,
4212 LATIN_EXTENDED_A,
4213 LATIN_EXTENDED_B,
4214 IPA_EXTENSIONS,
4215 SPACING_MODIFIER_LETTERS,
4216 COMBINING_DIACRITICAL_MARKS,
4217 GREEK,
4218 CYRILLIC,
4219 CYRILLIC_SUPPLEMENTARY,
4220 ARMENIAN,
4221 HEBREW,
4222 ARABIC,
4223 SYRIAC,
4224 ARABIC_SUPPLEMENT,
4225 THAANA,
4226 NKO,
4227 SAMARITAN,
4228 MANDAIC,
4229 SYRIAC_SUPPLEMENT,
4230 ARABIC_EXTENDED_B,
4231 ARABIC_EXTENDED_A,
4232 DEVANAGARI,
4233 BENGALI,
4234 GURMUKHI,
4235 GUJARATI,
4236 ORIYA,
4237 TAMIL,
4238 TELUGU,
4239 KANNADA,
4240 MALAYALAM,
4241 SINHALA,
4242 THAI,
4243 LAO,
4244 TIBETAN,
4245 MYANMAR,
4246 GEORGIAN,
4247 HANGUL_JAMO,
4248 ETHIOPIC,
4249 ETHIOPIC_SUPPLEMENT,
4250 CHEROKEE,
4251 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
4252 OGHAM,
4253 RUNIC,
4254 TAGALOG,
4255 HANUNOO,
4256 BUHID,
4257 TAGBANWA,
4258 KHMER,
4259 MONGOLIAN,
4260 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
4261 LIMBU,
4262 TAI_LE,
4263 NEW_TAI_LUE,
4264 KHMER_SYMBOLS,
4265 BUGINESE,
4266 TAI_THAM,
4267 COMBINING_DIACRITICAL_MARKS_EXTENDED,
4268 BALINESE,
4269 SUNDANESE,
4270 BATAK,
4271 LEPCHA,
4272 OL_CHIKI,
4273 CYRILLIC_EXTENDED_C,
4274 GEORGIAN_EXTENDED,
4275 SUNDANESE_SUPPLEMENT,
4276 VEDIC_EXTENSIONS,
4277 PHONETIC_EXTENSIONS,
4278 PHONETIC_EXTENSIONS_SUPPLEMENT,
4279 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
4280 LATIN_EXTENDED_ADDITIONAL,
4281 GREEK_EXTENDED,
4282 GENERAL_PUNCTUATION,
4283 SUPERSCRIPTS_AND_SUBSCRIPTS,
4284 CURRENCY_SYMBOLS,
4285 COMBINING_MARKS_FOR_SYMBOLS,
4286 LETTERLIKE_SYMBOLS,
4287 NUMBER_FORMS,
4288 ARROWS,
4289 MATHEMATICAL_OPERATORS,
4290 MISCELLANEOUS_TECHNICAL,
4291 CONTROL_PICTURES,
4292 OPTICAL_CHARACTER_RECOGNITION,
4293 ENCLOSED_ALPHANUMERICS,
4294 BOX_DRAWING,
4295 BLOCK_ELEMENTS,
4296 GEOMETRIC_SHAPES,
4297 MISCELLANEOUS_SYMBOLS,
4298 DINGBATS,
4299 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
4300 SUPPLEMENTAL_ARROWS_A,
4301 BRAILLE_PATTERNS,
4302 SUPPLEMENTAL_ARROWS_B,
4303 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
4304 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
4305 MISCELLANEOUS_SYMBOLS_AND_ARROWS,
4306 GLAGOLITIC,
4307 LATIN_EXTENDED_C,
4308 COPTIC,
4309 GEORGIAN_SUPPLEMENT,
4310 TIFINAGH,
4311 ETHIOPIC_EXTENDED,
4312 CYRILLIC_EXTENDED_A,
4313 SUPPLEMENTAL_PUNCTUATION,
4314 CJK_RADICALS_SUPPLEMENT,
4315 KANGXI_RADICALS,
4316 null,
4317 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
4318 CJK_SYMBOLS_AND_PUNCTUATION,
4319 HIRAGANA,
4320 KATAKANA,
4321 BOPOMOFO,
4322 HANGUL_COMPATIBILITY_JAMO,
4323 KANBUN,
4324 BOPOMOFO_EXTENDED,
4325 CJK_STROKES,
4326 KATAKANA_PHONETIC_EXTENSIONS,
4327 ENCLOSED_CJK_LETTERS_AND_MONTHS,
4328 CJK_COMPATIBILITY,
4329 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
4330 YIJING_HEXAGRAM_SYMBOLS,
4331 CJK_UNIFIED_IDEOGRAPHS,
4332 YI_SYLLABLES,
4333 YI_RADICALS,
4334 LISU,
4335 VAI,
4336 CYRILLIC_EXTENDED_B,
4337 BAMUM,
4338 MODIFIER_TONE_LETTERS,
4339 LATIN_EXTENDED_D,
4340 SYLOTI_NAGRI,
4341 COMMON_INDIC_NUMBER_FORMS,
4342 PHAGS_PA,
4343 SAURASHTRA,
4344 DEVANAGARI_EXTENDED,
4345 KAYAH_LI,
4346 REJANG,
4347 HANGUL_JAMO_EXTENDED_A,
4348 JAVANESE,
4349 MYANMAR_EXTENDED_B,
4350 CHAM,
4351 MYANMAR_EXTENDED_A,
4352 TAI_VIET,
4353 MEETEI_MAYEK_EXTENSIONS,
4354 ETHIOPIC_EXTENDED_A,
4355 LATIN_EXTENDED_E,
4356 CHEROKEE_SUPPLEMENT,
4357 MEETEI_MAYEK,
4358 HANGUL_SYLLABLES,
4359 HANGUL_JAMO_EXTENDED_B,
4360 HIGH_SURROGATES,
4361 HIGH_PRIVATE_USE_SURROGATES,
4362 LOW_SURROGATES,
4363 PRIVATE_USE_AREA,
4364 CJK_COMPATIBILITY_IDEOGRAPHS,
4365 ALPHABETIC_PRESENTATION_FORMS,
4366 ARABIC_PRESENTATION_FORMS_A,
4367 VARIATION_SELECTORS,
4368 VERTICAL_FORMS,
4369 COMBINING_HALF_MARKS,
4370 CJK_COMPATIBILITY_FORMS,
4371 SMALL_FORM_VARIANTS,
4372 ARABIC_PRESENTATION_FORMS_B,
4373 HALFWIDTH_AND_FULLWIDTH_FORMS,
4374 SPECIALS,
4375 LINEAR_B_SYLLABARY,
4376 LINEAR_B_IDEOGRAMS,
4377 AEGEAN_NUMBERS,
4378 ANCIENT_GREEK_NUMBERS,
4379 ANCIENT_SYMBOLS,
4380 PHAISTOS_DISC,
4381 null,
4382 LYCIAN,
4383 CARIAN,
4384 COPTIC_EPACT_NUMBERS,
4385 OLD_ITALIC,
4386 GOTHIC,
4387 OLD_PERMIC,
4388 UGARITIC,
4389 OLD_PERSIAN,
4390 null,
4391 DESERET,
4392 SHAVIAN,
4393 OSMANYA,
4394 OSAGE,
4395 ELBASAN,
4396 CAUCASIAN_ALBANIAN,
4397 VITHKUQI,
4398 TODHRI,
4399 LINEAR_A,
4400 LATIN_EXTENDED_F,
4401 null,
4402 CYPRIOT_SYLLABARY,
4403 IMPERIAL_ARAMAIC,
4404 PALMYRENE,
4405 NABATAEAN,
4406 null,
4407 HATRAN,
4408 PHOENICIAN,
4409 LYDIAN,
4410 SIDETIC,
4411 null,
4412 MEROITIC_HIEROGLYPHS,
4413 MEROITIC_CURSIVE,
4414 KHAROSHTHI,
4415 OLD_SOUTH_ARABIAN,
4416 OLD_NORTH_ARABIAN,
4417 null,
4418 MANICHAEAN,
4419 AVESTAN,
4420 INSCRIPTIONAL_PARTHIAN,
4421 INSCRIPTIONAL_PAHLAVI,
4422 PSALTER_PAHLAVI,
4423 null,
4424 OLD_TURKIC,
4425 null,
4426 OLD_HUNGARIAN,
4427 HANIFI_ROHINGYA,
4428 GARAY,
4429 null,
4430 RUMI_NUMERAL_SYMBOLS,
4431 YEZIDI,
4432 ARABIC_EXTENDED_C,
4433 OLD_SOGDIAN,
4434 SOGDIAN,
4435 OLD_UYGHUR,
4436 CHORASMIAN,
4437 ELYMAIC,
4438 BRAHMI,
4439 KAITHI,
4440 SORA_SOMPENG,
4441 CHAKMA,
4442 MAHAJANI,
4443 SHARADA,
4444 SINHALA_ARCHAIC_NUMBERS,
4445 KHOJKI,
4446 null,
4447 MULTANI,
4448 KHUDAWADI,
4449 GRANTHA,
4450 TULU_TIGALARI,
4451 NEWA,
4452 TIRHUTA,
4453 null,
4454 SIDDHAM,
4455 MODI,
4456 MONGOLIAN_SUPPLEMENT,
4457 TAKRI,
4458 MYANMAR_EXTENDED_C,
4459 AHOM,
4460 null,
4461 DOGRA,
4462 null,
4463 WARANG_CITI,
4464 DIVES_AKURU,
4465 null,
4466 NANDINAGARI,
4467 ZANABAZAR_SQUARE,
4468 SOYOMBO,
4469 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A,
4470 PAU_CIN_HAU,
4471 DEVANAGARI_EXTENDED_A,
4472 SHARADA_SUPPLEMENT,
4473 null,
4474 SUNUWAR,
4475 BHAIKSUKI,
4476 MARCHEN,
4477 null,
4478 MASARAM_GONDI,
4479 GUNJALA_GONDI,
4480 TOLONG_SIKI,
4481 null,
4482 MAKASAR,
4483 KAWI,
4484 null,
4485 LISU_SUPPLEMENT,
4486 TAMIL_SUPPLEMENT,
4487 CUNEIFORM,
4488 CUNEIFORM_NUMBERS_AND_PUNCTUATION,
4489 EARLY_DYNASTIC_CUNEIFORM,
4490 null,
4491 CYPRO_MINOAN,
4492 EGYPTIAN_HIEROGLYPHS,
4493 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
4494 EGYPTIAN_HIEROGLYPHS_EXTENDED_A,
4495 ANATOLIAN_HIEROGLYPHS,
4496 null,
4497 GURUNG_KHEMA,
4498 null,
4499 BAMUM_SUPPLEMENT,
4500 MRO,
4501 TANGSA,
4502 BASSA_VAH,
4503 PAHAWH_HMONG,
4504 null,
4505 KIRAT_RAI,
4506 null,
4507 MEDEFAIDRIN,
4508 BERIA_ERFE,
4509 null,
4510 MIAO,
4511 null,
4512 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
4513 TANGUT,
4514 TANGUT_COMPONENTS,
4515 KHITAN_SMALL_SCRIPT,
4516 TANGUT_SUPPLEMENT,
4517 TANGUT_COMPONENTS_SUPPLEMENT,
4518 null,
4519 KANA_EXTENDED_B,
4520 KANA_SUPPLEMENT,
4521 KANA_EXTENDED_A,
4522 SMALL_KANA_EXTENSION,
4523 NUSHU,
4524 null,
4525 DUPLOYAN,
4526 SHORTHAND_FORMAT_CONTROLS,
4527 null,
4528 SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT,
4529 MISCELLANEOUS_SYMBOLS_SUPPLEMENT,
4530 ZNAMENNY_MUSICAL_NOTATION,
4531 null,
4532 BYZANTINE_MUSICAL_SYMBOLS,
4533 MUSICAL_SYMBOLS,
4534 ANCIENT_GREEK_MUSICAL_NOTATION,
4535 null,
4536 KAKTOVIK_NUMERALS,
4537 MAYAN_NUMERALS,
4538 TAI_XUAN_JING_SYMBOLS,
4539 COUNTING_ROD_NUMERALS,
4540 null,
4541 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
4542 SUTTON_SIGNWRITING,
4543 null,
4544 LATIN_EXTENDED_G,
4545 GLAGOLITIC_SUPPLEMENT,
4546 CYRILLIC_EXTENDED_D,
4547 null,
4548 NYIAKENG_PUACHUE_HMONG,
4549 null,
4550 TOTO,
4551 WANCHO,
4552 null,
4553 NAG_MUNDARI,
4554 null,
4555 OL_ONAL,
4556 null,
4557 TAI_YO,
4558 null,
4559 ETHIOPIC_EXTENDED_B,
4560 MENDE_KIKAKUI,
4561 null,
4562 ADLAM,
4563 null,
4564 INDIC_SIYAQ_NUMBERS,
4565 null,
4566 OTTOMAN_SIYAQ_NUMBERS,
4567 null,
4568 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
4569 null,
4570 MAHJONG_TILES,
4571 DOMINO_TILES,
4572 PLAYING_CARDS,
4573 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
4574 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
4575 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
4576 EMOTICONS,
4577 ORNAMENTAL_DINGBATS,
4578 TRANSPORT_AND_MAP_SYMBOLS,
4579 ALCHEMICAL_SYMBOLS,
4580 GEOMETRIC_SHAPES_EXTENDED,
4581 SUPPLEMENTAL_ARROWS_C,
4582 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
4583 CHESS_SYMBOLS,
4584 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
4585 SYMBOLS_FOR_LEGACY_COMPUTING,
4586 null,
4587 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
4588 null,
4589 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
4590 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
4591 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
4592 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
4593 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I,
4594 null,
4595 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
4596 null,
4597 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G,
4598 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H,
4599 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J,
4600 null,
4601 TAGS,
4602 null,
4603 VARIATION_SELECTORS_SUPPLEMENT,
4604 null,
4605 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
4606 SUPPLEMENTARY_PRIVATE_USE_AREA_B,
4607 };
4608
4609
4610 /**
4611 * Returns the object representing the Unicode block containing the
4612 * given character, or {@code null} if the character is not a
4613 * member of a defined block.
4614 *
4615 * <p><b>Note:</b> This method cannot handle
4616 * <a href="Character.html#supplementary"> supplementary
4617 * characters</a>. To support all Unicode characters, including
4618 * supplementary characters, use the {@link #of(int)} method.
4619 *
4620 * @param c The character in question
4621 * @return The {@code UnicodeBlock} instance representing the
4622 * Unicode block of which this character is a member, or
4623 * {@code null} if the character is not a member of any
4624 * Unicode block
4625 */
4626 public static UnicodeBlock of(char c) {
4627 return of((int)c);
4628 }
4629
4630 /**
4631 * Returns the object representing the Unicode block
4632 * containing the given character (Unicode code point), or
4633 * {@code null} if the character is not a member of a
4634 * defined block.
4635 *
4636 * @param codePoint the character (Unicode code point) in question.
4637 * @return The {@code UnicodeBlock} instance representing the
4638 * Unicode block of which this character is a member, or
4639 * {@code null} if the character is not a member of any
4640 * Unicode block
4641 * @throws IllegalArgumentException if the specified
4642 * {@code codePoint} is an invalid Unicode code point.
4643 * @see Character#isValidCodePoint(int)
4644 * @since 1.5
4645 */
4646 public static UnicodeBlock of(int codePoint) {
4647 if (!isValidCodePoint(codePoint)) {
4648 throw new IllegalArgumentException(
4649 String.format("Not a valid Unicode code point: 0x%X", codePoint));
4650 }
4651
4652 int top, bottom, current;
4653 bottom = 0;
4654 top = blockStarts.length;
4655 current = top/2;
4656
4657 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4658 while (top - bottom > 1) {
4659 if (codePoint >= blockStarts[current]) {
4660 bottom = current;
4661 } else {
4662 top = current;
4663 }
4664 current = (top + bottom) / 2;
4665 }
4666 return blocks[current];
4667 }
4668
4669 /**
4670 * Returns the UnicodeBlock with the given name. Block
4671 * names are determined by The Unicode Standard. The file
4672 * {@code Blocks.txt} defines blocks for a particular
4673 * version of the standard. The {@link Character} class specifies
4674 * the version of the standard that it supports.
4675 * <p>
4676 * This method accepts block names in the following forms:
4677 * <ol>
4678 * <li> Canonical block names as defined by the Unicode Standard.
4679 * For example, the standard defines a "Basic Latin" block. Therefore, this
4680 * method accepts "Basic Latin" as a valid block name. The documentation of
4681 * each UnicodeBlock provides the canonical name.
4682 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4683 * is a valid block name for the "Basic Latin" block.
4684 * <li>The text representation of each constant UnicodeBlock identifier.
4685 * For example, this method will return the {@link #BASIC_LATIN} block if
4686 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
4687 * hyphens in the canonical name with underscores.
4688 * </ol>
4689 * Finally, character case is ignored for all of the valid block name forms.
4690 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4691 * The en_US locale's case mapping rules are used to provide case-insensitive
4692 * string comparisons for block name validation.
4693 * <p>
4694 * If the Unicode Standard changes block names, both the previous and
4695 * current names will be accepted.
4696 *
4697 * @param blockName A {@code UnicodeBlock} name.
4698 * @return The {@code UnicodeBlock} instance identified
4699 * by {@code blockName}
4700 * @throws IllegalArgumentException if {@code blockName} is an
4701 * invalid name
4702 * @throws NullPointerException if {@code blockName} is null
4703 * @since 1.5
4704 */
4705 public static final UnicodeBlock forName(String blockName) {
4706 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4707 if (block == null) {
4708 throw new IllegalArgumentException("Not a valid block name: "
4709 + blockName);
4710 }
4711 return block;
4712 }
4713 }
4714
4715
4716 /**
4717 * A family of character subsets representing the character scripts
4718 * defined in the <a href="http://www.unicode.org/reports/tr24/">
4719 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4720 * character is assigned to a single Unicode script, either a specific
4721 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4722 * one of the following three special values,
4723 * {@link Character.UnicodeScript#INHERITED Inherited},
4724 * {@link Character.UnicodeScript#COMMON Common} or
4725 * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4726 *
4727 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property
4728 * @since 1.7
4729 */
4730 public static enum UnicodeScript {
4731
4732 /**
4733 * Unicode script "Common".
4734 */
4735 COMMON,
4736
4737 /**
4738 * Unicode script "Latin".
4739 */
4740 LATIN,
4741
4742 /**
4743 * Unicode script "Greek".
4744 */
4745 GREEK,
4746
4747 /**
4748 * Unicode script "Cyrillic".
4749 */
4750 CYRILLIC,
4751
4752 /**
4753 * Unicode script "Armenian".
4754 */
4755 ARMENIAN,
4756
4757 /**
4758 * Unicode script "Hebrew".
4759 */
4760 HEBREW,
4761
4762 /**
4763 * Unicode script "Arabic".
4764 */
4765 ARABIC,
4766
4767 /**
4768 * Unicode script "Syriac".
4769 */
4770 SYRIAC,
4771
4772 /**
4773 * Unicode script "Thaana".
4774 */
4775 THAANA,
4776
4777 /**
4778 * Unicode script "Devanagari".
4779 */
4780 DEVANAGARI,
4781
4782 /**
4783 * Unicode script "Bengali".
4784 */
4785 BENGALI,
4786
4787 /**
4788 * Unicode script "Gurmukhi".
4789 */
4790 GURMUKHI,
4791
4792 /**
4793 * Unicode script "Gujarati".
4794 */
4795 GUJARATI,
4796
4797 /**
4798 * Unicode script "Oriya".
4799 */
4800 ORIYA,
4801
4802 /**
4803 * Unicode script "Tamil".
4804 */
4805 TAMIL,
4806
4807 /**
4808 * Unicode script "Telugu".
4809 */
4810 TELUGU,
4811
4812 /**
4813 * Unicode script "Kannada".
4814 */
4815 KANNADA,
4816
4817 /**
4818 * Unicode script "Malayalam".
4819 */
4820 MALAYALAM,
4821
4822 /**
4823 * Unicode script "Sinhala".
4824 */
4825 SINHALA,
4826
4827 /**
4828 * Unicode script "Thai".
4829 */
4830 THAI,
4831
4832 /**
4833 * Unicode script "Lao".
4834 */
4835 LAO,
4836
4837 /**
4838 * Unicode script "Tibetan".
4839 */
4840 TIBETAN,
4841
4842 /**
4843 * Unicode script "Myanmar".
4844 */
4845 MYANMAR,
4846
4847 /**
4848 * Unicode script "Georgian".
4849 */
4850 GEORGIAN,
4851
4852 /**
4853 * Unicode script "Hangul".
4854 */
4855 HANGUL,
4856
4857 /**
4858 * Unicode script "Ethiopic".
4859 */
4860 ETHIOPIC,
4861
4862 /**
4863 * Unicode script "Cherokee".
4864 */
4865 CHEROKEE,
4866
4867 /**
4868 * Unicode script "Canadian_Aboriginal".
4869 */
4870 CANADIAN_ABORIGINAL,
4871
4872 /**
4873 * Unicode script "Ogham".
4874 */
4875 OGHAM,
4876
4877 /**
4878 * Unicode script "Runic".
4879 */
4880 RUNIC,
4881
4882 /**
4883 * Unicode script "Khmer".
4884 */
4885 KHMER,
4886
4887 /**
4888 * Unicode script "Mongolian".
4889 */
4890 MONGOLIAN,
4891
4892 /**
4893 * Unicode script "Hiragana".
4894 */
4895 HIRAGANA,
4896
4897 /**
4898 * Unicode script "Katakana".
4899 */
4900 KATAKANA,
4901
4902 /**
4903 * Unicode script "Bopomofo".
4904 */
4905 BOPOMOFO,
4906
4907 /**
4908 * Unicode script "Han".
4909 */
4910 HAN,
4911
4912 /**
4913 * Unicode script "Yi".
4914 */
4915 YI,
4916
4917 /**
4918 * Unicode script "Old_Italic".
4919 */
4920 OLD_ITALIC,
4921
4922 /**
4923 * Unicode script "Gothic".
4924 */
4925 GOTHIC,
4926
4927 /**
4928 * Unicode script "Deseret".
4929 */
4930 DESERET,
4931
4932 /**
4933 * Unicode script "Inherited".
4934 */
4935 INHERITED,
4936
4937 /**
4938 * Unicode script "Tagalog".
4939 */
4940 TAGALOG,
4941
4942 /**
4943 * Unicode script "Hanunoo".
4944 */
4945 HANUNOO,
4946
4947 /**
4948 * Unicode script "Buhid".
4949 */
4950 BUHID,
4951
4952 /**
4953 * Unicode script "Tagbanwa".
4954 */
4955 TAGBANWA,
4956
4957 /**
4958 * Unicode script "Limbu".
4959 */
4960 LIMBU,
4961
4962 /**
4963 * Unicode script "Tai_Le".
4964 */
4965 TAI_LE,
4966
4967 /**
4968 * Unicode script "Linear_B".
4969 */
4970 LINEAR_B,
4971
4972 /**
4973 * Unicode script "Ugaritic".
4974 */
4975 UGARITIC,
4976
4977 /**
4978 * Unicode script "Shavian".
4979 */
4980 SHAVIAN,
4981
4982 /**
4983 * Unicode script "Osmanya".
4984 */
4985 OSMANYA,
4986
4987 /**
4988 * Unicode script "Cypriot".
4989 */
4990 CYPRIOT,
4991
4992 /**
4993 * Unicode script "Braille".
4994 */
4995 BRAILLE,
4996
4997 /**
4998 * Unicode script "Buginese".
4999 */
5000 BUGINESE,
5001
5002 /**
5003 * Unicode script "Coptic".
5004 */
5005 COPTIC,
5006
5007 /**
5008 * Unicode script "New_Tai_Lue".
5009 */
5010 NEW_TAI_LUE,
5011
5012 /**
5013 * Unicode script "Glagolitic".
5014 */
5015 GLAGOLITIC,
5016
5017 /**
5018 * Unicode script "Tifinagh".
5019 */
5020 TIFINAGH,
5021
5022 /**
5023 * Unicode script "Syloti_Nagri".
5024 */
5025 SYLOTI_NAGRI,
5026
5027 /**
5028 * Unicode script "Old_Persian".
5029 */
5030 OLD_PERSIAN,
5031
5032 /**
5033 * Unicode script "Kharoshthi".
5034 */
5035 KHAROSHTHI,
5036
5037 /**
5038 * Unicode script "Balinese".
5039 */
5040 BALINESE,
5041
5042 /**
5043 * Unicode script "Cuneiform".
5044 */
5045 CUNEIFORM,
5046
5047 /**
5048 * Unicode script "Phoenician".
5049 */
5050 PHOENICIAN,
5051
5052 /**
5053 * Unicode script "Phags_Pa".
5054 */
5055 PHAGS_PA,
5056
5057 /**
5058 * Unicode script "Nko".
5059 */
5060 NKO,
5061
5062 /**
5063 * Unicode script "Sundanese".
5064 */
5065 SUNDANESE,
5066
5067 /**
5068 * Unicode script "Batak".
5069 */
5070 BATAK,
5071
5072 /**
5073 * Unicode script "Lepcha".
5074 */
5075 LEPCHA,
5076
5077 /**
5078 * Unicode script "Ol_Chiki".
5079 */
5080 OL_CHIKI,
5081
5082 /**
5083 * Unicode script "Vai".
5084 */
5085 VAI,
5086
5087 /**
5088 * Unicode script "Saurashtra".
5089 */
5090 SAURASHTRA,
5091
5092 /**
5093 * Unicode script "Kayah_Li".
5094 */
5095 KAYAH_LI,
5096
5097 /**
5098 * Unicode script "Rejang".
5099 */
5100 REJANG,
5101
5102 /**
5103 * Unicode script "Lycian".
5104 */
5105 LYCIAN,
5106
5107 /**
5108 * Unicode script "Carian".
5109 */
5110 CARIAN,
5111
5112 /**
5113 * Unicode script "Lydian".
5114 */
5115 LYDIAN,
5116
5117 /**
5118 * Unicode script "Cham".
5119 */
5120 CHAM,
5121
5122 /**
5123 * Unicode script "Tai_Tham".
5124 */
5125 TAI_THAM,
5126
5127 /**
5128 * Unicode script "Tai_Viet".
5129 */
5130 TAI_VIET,
5131
5132 /**
5133 * Unicode script "Avestan".
5134 */
5135 AVESTAN,
5136
5137 /**
5138 * Unicode script "Egyptian_Hieroglyphs".
5139 */
5140 EGYPTIAN_HIEROGLYPHS,
5141
5142 /**
5143 * Unicode script "Samaritan".
5144 */
5145 SAMARITAN,
5146
5147 /**
5148 * Unicode script "Mandaic".
5149 */
5150 MANDAIC,
5151
5152 /**
5153 * Unicode script "Lisu".
5154 */
5155 LISU,
5156
5157 /**
5158 * Unicode script "Bamum".
5159 */
5160 BAMUM,
5161
5162 /**
5163 * Unicode script "Javanese".
5164 */
5165 JAVANESE,
5166
5167 /**
5168 * Unicode script "Meetei_Mayek".
5169 */
5170 MEETEI_MAYEK,
5171
5172 /**
5173 * Unicode script "Imperial_Aramaic".
5174 */
5175 IMPERIAL_ARAMAIC,
5176
5177 /**
5178 * Unicode script "Old_South_Arabian".
5179 */
5180 OLD_SOUTH_ARABIAN,
5181
5182 /**
5183 * Unicode script "Inscriptional_Parthian".
5184 */
5185 INSCRIPTIONAL_PARTHIAN,
5186
5187 /**
5188 * Unicode script "Inscriptional_Pahlavi".
5189 */
5190 INSCRIPTIONAL_PAHLAVI,
5191
5192 /**
5193 * Unicode script "Old_Turkic".
5194 */
5195 OLD_TURKIC,
5196
5197 /**
5198 * Unicode script "Brahmi".
5199 */
5200 BRAHMI,
5201
5202 /**
5203 * Unicode script "Kaithi".
5204 */
5205 KAITHI,
5206
5207 /**
5208 * Unicode script "Meroitic Hieroglyphs".
5209 * @since 1.8
5210 */
5211 MEROITIC_HIEROGLYPHS,
5212
5213 /**
5214 * Unicode script "Meroitic Cursive".
5215 * @since 1.8
5216 */
5217 MEROITIC_CURSIVE,
5218
5219 /**
5220 * Unicode script "Sora Sompeng".
5221 * @since 1.8
5222 */
5223 SORA_SOMPENG,
5224
5225 /**
5226 * Unicode script "Chakma".
5227 * @since 1.8
5228 */
5229 CHAKMA,
5230
5231 /**
5232 * Unicode script "Sharada".
5233 * @since 1.8
5234 */
5235 SHARADA,
5236
5237 /**
5238 * Unicode script "Takri".
5239 * @since 1.8
5240 */
5241 TAKRI,
5242
5243 /**
5244 * Unicode script "Miao".
5245 * @since 1.8
5246 */
5247 MIAO,
5248
5249 /**
5250 * Unicode script "Caucasian Albanian".
5251 * @since 9
5252 */
5253 CAUCASIAN_ALBANIAN,
5254
5255 /**
5256 * Unicode script "Bassa Vah".
5257 * @since 9
5258 */
5259 BASSA_VAH,
5260
5261 /**
5262 * Unicode script "Duployan".
5263 * @since 9
5264 */
5265 DUPLOYAN,
5266
5267 /**
5268 * Unicode script "Elbasan".
5269 * @since 9
5270 */
5271 ELBASAN,
5272
5273 /**
5274 * Unicode script "Grantha".
5275 * @since 9
5276 */
5277 GRANTHA,
5278
5279 /**
5280 * Unicode script "Pahawh Hmong".
5281 * @since 9
5282 */
5283 PAHAWH_HMONG,
5284
5285 /**
5286 * Unicode script "Khojki".
5287 * @since 9
5288 */
5289 KHOJKI,
5290
5291 /**
5292 * Unicode script "Linear A".
5293 * @since 9
5294 */
5295 LINEAR_A,
5296
5297 /**
5298 * Unicode script "Mahajani".
5299 * @since 9
5300 */
5301 MAHAJANI,
5302
5303 /**
5304 * Unicode script "Manichaean".
5305 * @since 9
5306 */
5307 MANICHAEAN,
5308
5309 /**
5310 * Unicode script "Mende Kikakui".
5311 * @since 9
5312 */
5313 MENDE_KIKAKUI,
5314
5315 /**
5316 * Unicode script "Modi".
5317 * @since 9
5318 */
5319 MODI,
5320
5321 /**
5322 * Unicode script "Mro".
5323 * @since 9
5324 */
5325 MRO,
5326
5327 /**
5328 * Unicode script "Old North Arabian".
5329 * @since 9
5330 */
5331 OLD_NORTH_ARABIAN,
5332
5333 /**
5334 * Unicode script "Nabataean".
5335 * @since 9
5336 */
5337 NABATAEAN,
5338
5339 /**
5340 * Unicode script "Palmyrene".
5341 * @since 9
5342 */
5343 PALMYRENE,
5344
5345 /**
5346 * Unicode script "Pau Cin Hau".
5347 * @since 9
5348 */
5349 PAU_CIN_HAU,
5350
5351 /**
5352 * Unicode script "Old Permic".
5353 * @since 9
5354 */
5355 OLD_PERMIC,
5356
5357 /**
5358 * Unicode script "Psalter Pahlavi".
5359 * @since 9
5360 */
5361 PSALTER_PAHLAVI,
5362
5363 /**
5364 * Unicode script "Siddham".
5365 * @since 9
5366 */
5367 SIDDHAM,
5368
5369 /**
5370 * Unicode script "Khudawadi".
5371 * @since 9
5372 */
5373 KHUDAWADI,
5374
5375 /**
5376 * Unicode script "Tirhuta".
5377 * @since 9
5378 */
5379 TIRHUTA,
5380
5381 /**
5382 * Unicode script "Warang Citi".
5383 * @since 9
5384 */
5385 WARANG_CITI,
5386
5387 /**
5388 * Unicode script "Ahom".
5389 * @since 9
5390 */
5391 AHOM,
5392
5393 /**
5394 * Unicode script "Anatolian Hieroglyphs".
5395 * @since 9
5396 */
5397 ANATOLIAN_HIEROGLYPHS,
5398
5399 /**
5400 * Unicode script "Hatran".
5401 * @since 9
5402 */
5403 HATRAN,
5404
5405 /**
5406 * Unicode script "Multani".
5407 * @since 9
5408 */
5409 MULTANI,
5410
5411 /**
5412 * Unicode script "Old Hungarian".
5413 * @since 9
5414 */
5415 OLD_HUNGARIAN,
5416
5417 /**
5418 * Unicode script "SignWriting".
5419 * @since 9
5420 */
5421 SIGNWRITING,
5422
5423 /**
5424 * Unicode script "Adlam".
5425 * @since 11
5426 */
5427 ADLAM,
5428
5429 /**
5430 * Unicode script "Bhaiksuki".
5431 * @since 11
5432 */
5433 BHAIKSUKI,
5434
5435 /**
5436 * Unicode script "Marchen".
5437 * @since 11
5438 */
5439 MARCHEN,
5440
5441 /**
5442 * Unicode script "Newa".
5443 * @since 11
5444 */
5445 NEWA,
5446
5447 /**
5448 * Unicode script "Osage".
5449 * @since 11
5450 */
5451 OSAGE,
5452
5453 /**
5454 * Unicode script "Tangut".
5455 * @since 11
5456 */
5457 TANGUT,
5458
5459 /**
5460 * Unicode script "Masaram Gondi".
5461 * @since 11
5462 */
5463 MASARAM_GONDI,
5464
5465 /**
5466 * Unicode script "Nushu".
5467 * @since 11
5468 */
5469 NUSHU,
5470
5471 /**
5472 * Unicode script "Soyombo".
5473 * @since 11
5474 */
5475 SOYOMBO,
5476
5477 /**
5478 * Unicode script "Zanabazar Square".
5479 * @since 11
5480 */
5481 ZANABAZAR_SQUARE,
5482
5483 /**
5484 * Unicode script "Hanifi Rohingya".
5485 * @since 12
5486 */
5487 HANIFI_ROHINGYA,
5488
5489 /**
5490 * Unicode script "Old Sogdian".
5491 * @since 12
5492 */
5493 OLD_SOGDIAN,
5494
5495 /**
5496 * Unicode script "Sogdian".
5497 * @since 12
5498 */
5499 SOGDIAN,
5500
5501 /**
5502 * Unicode script "Dogra".
5503 * @since 12
5504 */
5505 DOGRA,
5506
5507 /**
5508 * Unicode script "Gunjala Gondi".
5509 * @since 12
5510 */
5511 GUNJALA_GONDI,
5512
5513 /**
5514 * Unicode script "Makasar".
5515 * @since 12
5516 */
5517 MAKASAR,
5518
5519 /**
5520 * Unicode script "Medefaidrin".
5521 * @since 12
5522 */
5523 MEDEFAIDRIN,
5524
5525 /**
5526 * Unicode script "Elymaic".
5527 * @since 13
5528 */
5529 ELYMAIC,
5530
5531 /**
5532 * Unicode script "Nandinagari".
5533 * @since 13
5534 */
5535 NANDINAGARI,
5536
5537 /**
5538 * Unicode script "Nyiakeng Puachue Hmong".
5539 * @since 13
5540 */
5541 NYIAKENG_PUACHUE_HMONG,
5542
5543 /**
5544 * Unicode script "Wancho".
5545 * @since 13
5546 */
5547 WANCHO,
5548
5549 /**
5550 * Unicode script "Yezidi".
5551 * @since 15
5552 */
5553 YEZIDI,
5554
5555 /**
5556 * Unicode script "Chorasmian".
5557 * @since 15
5558 */
5559 CHORASMIAN,
5560
5561 /**
5562 * Unicode script "Dives Akuru".
5563 * @since 15
5564 */
5565 DIVES_AKURU,
5566
5567 /**
5568 * Unicode script "Khitan Small Script".
5569 * @since 15
5570 */
5571 KHITAN_SMALL_SCRIPT,
5572
5573 /**
5574 * Unicode script "Vithkuqi".
5575 * @since 19
5576 */
5577 VITHKUQI,
5578
5579 /**
5580 * Unicode script "Old Uyghur".
5581 * @since 19
5582 */
5583 OLD_UYGHUR,
5584
5585 /**
5586 * Unicode script "Cypro Minoan".
5587 * @since 19
5588 */
5589 CYPRO_MINOAN,
5590
5591 /**
5592 * Unicode script "Tangsa".
5593 * @since 19
5594 */
5595 TANGSA,
5596
5597 /**
5598 * Unicode script "Toto".
5599 * @since 19
5600 */
5601 TOTO,
5602
5603 /**
5604 * Unicode script "Kawi".
5605 * @since 20
5606 */
5607 KAWI,
5608
5609 /**
5610 * Unicode script "Nag Mundari".
5611 * @since 20
5612 */
5613 NAG_MUNDARI,
5614
5615 /**
5616 * Unicode script "Todhri".
5617 * @since 24
5618 */
5619 TODHRI,
5620
5621 /**
5622 * Unicode script "Garay".
5623 * @since 24
5624 */
5625 GARAY,
5626
5627 /**
5628 * Unicode script "Tulu Tigalari".
5629 * @since 24
5630 */
5631 TULU_TIGALARI,
5632
5633 /**
5634 * Unicode script "Sunuwar".
5635 * @since 24
5636 */
5637 SUNUWAR,
5638
5639 /**
5640 * Unicode script "Gurung Khema".
5641 * @since 24
5642 */
5643 GURUNG_KHEMA,
5644
5645 /**
5646 * Unicode script "Kirat Rai".
5647 * @since 24
5648 */
5649 KIRAT_RAI,
5650
5651 /**
5652 * Unicode script "Ol Onal".
5653 * @since 24
5654 */
5655 OL_ONAL,
5656
5657 /**
5658 * Unicode script "Sidetic".
5659 * @since 26
5660 */
5661 SIDETIC,
5662
5663 /**
5664 * Unicode script "Tolong Siki".
5665 * @since 26
5666 */
5667 TOLONG_SIKI,
5668
5669 /**
5670 * Unicode script "Beria Erfe".
5671 * @since 26
5672 */
5673 BERIA_ERFE,
5674
5675 /**
5676 * Unicode script "Tai Yo".
5677 * @since 26
5678 */
5679 TAI_YO,
5680
5681 /**
5682 * Unicode script "Unknown".
5683 */
5684 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map.
5685
5686 private static final int[] scriptStarts = {
5687 0x0000, // 0000..0040; COMMON
5688 0x0041, // 0041..005A; LATIN
5689 0x005B, // 005B..0060; COMMON
5690 0x0061, // 0061..007A; LATIN
5691 0x007B, // 007B..00A9; COMMON
5692 0x00AA, // 00AA ; LATIN
5693 0x00AB, // 00AB..00B9; COMMON
5694 0x00BA, // 00BA ; LATIN
5695 0x00BB, // 00BB..00BF; COMMON
5696 0x00C0, // 00C0..00D6; LATIN
5697 0x00D7, // 00D7 ; COMMON
5698 0x00D8, // 00D8..00F6; LATIN
5699 0x00F7, // 00F7 ; COMMON
5700 0x00F8, // 00F8..02B8; LATIN
5701 0x02B9, // 02B9..02DF; COMMON
5702 0x02E0, // 02E0..02E4; LATIN
5703 0x02E5, // 02E5..02E9; COMMON
5704 0x02EA, // 02EA..02EB; BOPOMOFO
5705 0x02EC, // 02EC..02FF; COMMON
5706 0x0300, // 0300..036F; INHERITED
5707 0x0370, // 0370..0373; GREEK
5708 0x0374, // 0374 ; COMMON
5709 0x0375, // 0375..0377; GREEK
5710 0x0378, // 0378..0379; UNKNOWN
5711 0x037A, // 037A..037D; GREEK
5712 0x037E, // 037E ; COMMON
5713 0x037F, // 037F ; GREEK
5714 0x0380, // 0380..0383; UNKNOWN
5715 0x0384, // 0384 ; GREEK
5716 0x0385, // 0385 ; COMMON
5717 0x0386, // 0386 ; GREEK
5718 0x0387, // 0387 ; COMMON
5719 0x0388, // 0388..038A; GREEK
5720 0x038B, // 038B ; UNKNOWN
5721 0x038C, // 038C ; GREEK
5722 0x038D, // 038D ; UNKNOWN
5723 0x038E, // 038E..03A1; GREEK
5724 0x03A2, // 03A2 ; UNKNOWN
5725 0x03A3, // 03A3..03E1; GREEK
5726 0x03E2, // 03E2..03EF; COPTIC
5727 0x03F0, // 03F0..03FF; GREEK
5728 0x0400, // 0400..0484; CYRILLIC
5729 0x0485, // 0485..0486; INHERITED
5730 0x0487, // 0487..052F; CYRILLIC
5731 0x0530, // 0530 ; UNKNOWN
5732 0x0531, // 0531..0556; ARMENIAN
5733 0x0557, // 0557..0558; UNKNOWN
5734 0x0559, // 0559..058A; ARMENIAN
5735 0x058B, // 058B..058C; UNKNOWN
5736 0x058D, // 058D..058F; ARMENIAN
5737 0x0590, // 0590 ; UNKNOWN
5738 0x0591, // 0591..05C7; HEBREW
5739 0x05C8, // 05C8..05CF; UNKNOWN
5740 0x05D0, // 05D0..05EA; HEBREW
5741 0x05EB, // 05EB..05EE; UNKNOWN
5742 0x05EF, // 05EF..05F4; HEBREW
5743 0x05F5, // 05F5..05FF; UNKNOWN
5744 0x0600, // 0600..0604; ARABIC
5745 0x0605, // 0605 ; COMMON
5746 0x0606, // 0606..060B; ARABIC
5747 0x060C, // 060C ; COMMON
5748 0x060D, // 060D..061A; ARABIC
5749 0x061B, // 061B ; COMMON
5750 0x061C, // 061C..061E; ARABIC
5751 0x061F, // 061F ; COMMON
5752 0x0620, // 0620..063F; ARABIC
5753 0x0640, // 0640 ; COMMON
5754 0x0641, // 0641..064A; ARABIC
5755 0x064B, // 064B..0655; INHERITED
5756 0x0656, // 0656..066F; ARABIC
5757 0x0670, // 0670 ; INHERITED
5758 0x0671, // 0671..06DC; ARABIC
5759 0x06DD, // 06DD ; COMMON
5760 0x06DE, // 06DE..06FF; ARABIC
5761 0x0700, // 0700..070D; SYRIAC
5762 0x070E, // 070E ; UNKNOWN
5763 0x070F, // 070F..074A; SYRIAC
5764 0x074B, // 074B..074C; UNKNOWN
5765 0x074D, // 074D..074F; SYRIAC
5766 0x0750, // 0750..077F; ARABIC
5767 0x0780, // 0780..07B1; THAANA
5768 0x07B2, // 07B2..07BF; UNKNOWN
5769 0x07C0, // 07C0..07FA; NKO
5770 0x07FB, // 07FB..07FC; UNKNOWN
5771 0x07FD, // 07FD..07FF; NKO
5772 0x0800, // 0800..082D; SAMARITAN
5773 0x082E, // 082E..082F; UNKNOWN
5774 0x0830, // 0830..083E; SAMARITAN
5775 0x083F, // 083F ; UNKNOWN
5776 0x0840, // 0840..085B; MANDAIC
5777 0x085C, // 085C..085D; UNKNOWN
5778 0x085E, // 085E ; MANDAIC
5779 0x085F, // 085F ; UNKNOWN
5780 0x0860, // 0860..086A; SYRIAC
5781 0x086B, // 086B..086F; UNKNOWN
5782 0x0870, // 0870..0891; ARABIC
5783 0x0892, // 0892..0896; UNKNOWN
5784 0x0897, // 0897..08E1; ARABIC
5785 0x08E2, // 08E2 ; COMMON
5786 0x08E3, // 08E3..08FF; ARABIC
5787 0x0900, // 0900..0950; DEVANAGARI
5788 0x0951, // 0951..0954; INHERITED
5789 0x0955, // 0955..0963; DEVANAGARI
5790 0x0964, // 0964..0965; COMMON
5791 0x0966, // 0966..097F; DEVANAGARI
5792 0x0980, // 0980..0983; BENGALI
5793 0x0984, // 0984 ; UNKNOWN
5794 0x0985, // 0985..098C; BENGALI
5795 0x098D, // 098D..098E; UNKNOWN
5796 0x098F, // 098F..0990; BENGALI
5797 0x0991, // 0991..0992; UNKNOWN
5798 0x0993, // 0993..09A8; BENGALI
5799 0x09A9, // 09A9 ; UNKNOWN
5800 0x09AA, // 09AA..09B0; BENGALI
5801 0x09B1, // 09B1 ; UNKNOWN
5802 0x09B2, // 09B2 ; BENGALI
5803 0x09B3, // 09B3..09B5; UNKNOWN
5804 0x09B6, // 09B6..09B9; BENGALI
5805 0x09BA, // 09BA..09BB; UNKNOWN
5806 0x09BC, // 09BC..09C4; BENGALI
5807 0x09C5, // 09C5..09C6; UNKNOWN
5808 0x09C7, // 09C7..09C8; BENGALI
5809 0x09C9, // 09C9..09CA; UNKNOWN
5810 0x09CB, // 09CB..09CE; BENGALI
5811 0x09CF, // 09CF..09D6; UNKNOWN
5812 0x09D7, // 09D7 ; BENGALI
5813 0x09D8, // 09D8..09DB; UNKNOWN
5814 0x09DC, // 09DC..09DD; BENGALI
5815 0x09DE, // 09DE ; UNKNOWN
5816 0x09DF, // 09DF..09E3; BENGALI
5817 0x09E4, // 09E4..09E5; UNKNOWN
5818 0x09E6, // 09E6..09FE; BENGALI
5819 0x09FF, // 09FF..0A00; UNKNOWN
5820 0x0A01, // 0A01..0A03; GURMUKHI
5821 0x0A04, // 0A04 ; UNKNOWN
5822 0x0A05, // 0A05..0A0A; GURMUKHI
5823 0x0A0B, // 0A0B..0A0E; UNKNOWN
5824 0x0A0F, // 0A0F..0A10; GURMUKHI
5825 0x0A11, // 0A11..0A12; UNKNOWN
5826 0x0A13, // 0A13..0A28; GURMUKHI
5827 0x0A29, // 0A29 ; UNKNOWN
5828 0x0A2A, // 0A2A..0A30; GURMUKHI
5829 0x0A31, // 0A31 ; UNKNOWN
5830 0x0A32, // 0A32..0A33; GURMUKHI
5831 0x0A34, // 0A34 ; UNKNOWN
5832 0x0A35, // 0A35..0A36; GURMUKHI
5833 0x0A37, // 0A37 ; UNKNOWN
5834 0x0A38, // 0A38..0A39; GURMUKHI
5835 0x0A3A, // 0A3A..0A3B; UNKNOWN
5836 0x0A3C, // 0A3C ; GURMUKHI
5837 0x0A3D, // 0A3D ; UNKNOWN
5838 0x0A3E, // 0A3E..0A42; GURMUKHI
5839 0x0A43, // 0A43..0A46; UNKNOWN
5840 0x0A47, // 0A47..0A48; GURMUKHI
5841 0x0A49, // 0A49..0A4A; UNKNOWN
5842 0x0A4B, // 0A4B..0A4D; GURMUKHI
5843 0x0A4E, // 0A4E..0A50; UNKNOWN
5844 0x0A51, // 0A51 ; GURMUKHI
5845 0x0A52, // 0A52..0A58; UNKNOWN
5846 0x0A59, // 0A59..0A5C; GURMUKHI
5847 0x0A5D, // 0A5D ; UNKNOWN
5848 0x0A5E, // 0A5E ; GURMUKHI
5849 0x0A5F, // 0A5F..0A65; UNKNOWN
5850 0x0A66, // 0A66..0A76; GURMUKHI
5851 0x0A77, // 0A77..0A80; UNKNOWN
5852 0x0A81, // 0A81..0A83; GUJARATI
5853 0x0A84, // 0A84 ; UNKNOWN
5854 0x0A85, // 0A85..0A8D; GUJARATI
5855 0x0A8E, // 0A8E ; UNKNOWN
5856 0x0A8F, // 0A8F..0A91; GUJARATI
5857 0x0A92, // 0A92 ; UNKNOWN
5858 0x0A93, // 0A93..0AA8; GUJARATI
5859 0x0AA9, // 0AA9 ; UNKNOWN
5860 0x0AAA, // 0AAA..0AB0; GUJARATI
5861 0x0AB1, // 0AB1 ; UNKNOWN
5862 0x0AB2, // 0AB2..0AB3; GUJARATI
5863 0x0AB4, // 0AB4 ; UNKNOWN
5864 0x0AB5, // 0AB5..0AB9; GUJARATI
5865 0x0ABA, // 0ABA..0ABB; UNKNOWN
5866 0x0ABC, // 0ABC..0AC5; GUJARATI
5867 0x0AC6, // 0AC6 ; UNKNOWN
5868 0x0AC7, // 0AC7..0AC9; GUJARATI
5869 0x0ACA, // 0ACA ; UNKNOWN
5870 0x0ACB, // 0ACB..0ACD; GUJARATI
5871 0x0ACE, // 0ACE..0ACF; UNKNOWN
5872 0x0AD0, // 0AD0 ; GUJARATI
5873 0x0AD1, // 0AD1..0ADF; UNKNOWN
5874 0x0AE0, // 0AE0..0AE3; GUJARATI
5875 0x0AE4, // 0AE4..0AE5; UNKNOWN
5876 0x0AE6, // 0AE6..0AF1; GUJARATI
5877 0x0AF2, // 0AF2..0AF8; UNKNOWN
5878 0x0AF9, // 0AF9..0AFF; GUJARATI
5879 0x0B00, // 0B00 ; UNKNOWN
5880 0x0B01, // 0B01..0B03; ORIYA
5881 0x0B04, // 0B04 ; UNKNOWN
5882 0x0B05, // 0B05..0B0C; ORIYA
5883 0x0B0D, // 0B0D..0B0E; UNKNOWN
5884 0x0B0F, // 0B0F..0B10; ORIYA
5885 0x0B11, // 0B11..0B12; UNKNOWN
5886 0x0B13, // 0B13..0B28; ORIYA
5887 0x0B29, // 0B29 ; UNKNOWN
5888 0x0B2A, // 0B2A..0B30; ORIYA
5889 0x0B31, // 0B31 ; UNKNOWN
5890 0x0B32, // 0B32..0B33; ORIYA
5891 0x0B34, // 0B34 ; UNKNOWN
5892 0x0B35, // 0B35..0B39; ORIYA
5893 0x0B3A, // 0B3A..0B3B; UNKNOWN
5894 0x0B3C, // 0B3C..0B44; ORIYA
5895 0x0B45, // 0B45..0B46; UNKNOWN
5896 0x0B47, // 0B47..0B48; ORIYA
5897 0x0B49, // 0B49..0B4A; UNKNOWN
5898 0x0B4B, // 0B4B..0B4D; ORIYA
5899 0x0B4E, // 0B4E..0B54; UNKNOWN
5900 0x0B55, // 0B55..0B57; ORIYA
5901 0x0B58, // 0B58..0B5B; UNKNOWN
5902 0x0B5C, // 0B5C..0B5D; ORIYA
5903 0x0B5E, // 0B5E ; UNKNOWN
5904 0x0B5F, // 0B5F..0B63; ORIYA
5905 0x0B64, // 0B64..0B65; UNKNOWN
5906 0x0B66, // 0B66..0B77; ORIYA
5907 0x0B78, // 0B78..0B81; UNKNOWN
5908 0x0B82, // 0B82..0B83; TAMIL
5909 0x0B84, // 0B84 ; UNKNOWN
5910 0x0B85, // 0B85..0B8A; TAMIL
5911 0x0B8B, // 0B8B..0B8D; UNKNOWN
5912 0x0B8E, // 0B8E..0B90; TAMIL
5913 0x0B91, // 0B91 ; UNKNOWN
5914 0x0B92, // 0B92..0B95; TAMIL
5915 0x0B96, // 0B96..0B98; UNKNOWN
5916 0x0B99, // 0B99..0B9A; TAMIL
5917 0x0B9B, // 0B9B ; UNKNOWN
5918 0x0B9C, // 0B9C ; TAMIL
5919 0x0B9D, // 0B9D ; UNKNOWN
5920 0x0B9E, // 0B9E..0B9F; TAMIL
5921 0x0BA0, // 0BA0..0BA2; UNKNOWN
5922 0x0BA3, // 0BA3..0BA4; TAMIL
5923 0x0BA5, // 0BA5..0BA7; UNKNOWN
5924 0x0BA8, // 0BA8..0BAA; TAMIL
5925 0x0BAB, // 0BAB..0BAD; UNKNOWN
5926 0x0BAE, // 0BAE..0BB9; TAMIL
5927 0x0BBA, // 0BBA..0BBD; UNKNOWN
5928 0x0BBE, // 0BBE..0BC2; TAMIL
5929 0x0BC3, // 0BC3..0BC5; UNKNOWN
5930 0x0BC6, // 0BC6..0BC8; TAMIL
5931 0x0BC9, // 0BC9 ; UNKNOWN
5932 0x0BCA, // 0BCA..0BCD; TAMIL
5933 0x0BCE, // 0BCE..0BCF; UNKNOWN
5934 0x0BD0, // 0BD0 ; TAMIL
5935 0x0BD1, // 0BD1..0BD6; UNKNOWN
5936 0x0BD7, // 0BD7 ; TAMIL
5937 0x0BD8, // 0BD8..0BE5; UNKNOWN
5938 0x0BE6, // 0BE6..0BFA; TAMIL
5939 0x0BFB, // 0BFB..0BFF; UNKNOWN
5940 0x0C00, // 0C00..0C0C; TELUGU
5941 0x0C0D, // 0C0D ; UNKNOWN
5942 0x0C0E, // 0C0E..0C10; TELUGU
5943 0x0C11, // 0C11 ; UNKNOWN
5944 0x0C12, // 0C12..0C28; TELUGU
5945 0x0C29, // 0C29 ; UNKNOWN
5946 0x0C2A, // 0C2A..0C39; TELUGU
5947 0x0C3A, // 0C3A..0C3B; UNKNOWN
5948 0x0C3C, // 0C3C..0C44; TELUGU
5949 0x0C45, // 0C45 ; UNKNOWN
5950 0x0C46, // 0C46..0C48; TELUGU
5951 0x0C49, // 0C49 ; UNKNOWN
5952 0x0C4A, // 0C4A..0C4D; TELUGU
5953 0x0C4E, // 0C4E..0C54; UNKNOWN
5954 0x0C55, // 0C55..0C56; TELUGU
5955 0x0C57, // 0C57 ; UNKNOWN
5956 0x0C58, // 0C58..0C5A; TELUGU
5957 0x0C5B, // 0C5B ; UNKNOWN
5958 0x0C5C, // 0C5C..0C5D; TELUGU
5959 0x0C5E, // 0C5E..0C5F; UNKNOWN
5960 0x0C60, // 0C60..0C63; TELUGU
5961 0x0C64, // 0C64..0C65; UNKNOWN
5962 0x0C66, // 0C66..0C6F; TELUGU
5963 0x0C70, // 0C70..0C76; UNKNOWN
5964 0x0C77, // 0C77..0C7F; TELUGU
5965 0x0C80, // 0C80..0C8C; KANNADA
5966 0x0C8D, // 0C8D ; UNKNOWN
5967 0x0C8E, // 0C8E..0C90; KANNADA
5968 0x0C91, // 0C91 ; UNKNOWN
5969 0x0C92, // 0C92..0CA8; KANNADA
5970 0x0CA9, // 0CA9 ; UNKNOWN
5971 0x0CAA, // 0CAA..0CB3; KANNADA
5972 0x0CB4, // 0CB4 ; UNKNOWN
5973 0x0CB5, // 0CB5..0CB9; KANNADA
5974 0x0CBA, // 0CBA..0CBB; UNKNOWN
5975 0x0CBC, // 0CBC..0CC4; KANNADA
5976 0x0CC5, // 0CC5 ; UNKNOWN
5977 0x0CC6, // 0CC6..0CC8; KANNADA
5978 0x0CC9, // 0CC9 ; UNKNOWN
5979 0x0CCA, // 0CCA..0CCD; KANNADA
5980 0x0CCE, // 0CCE..0CD4; UNKNOWN
5981 0x0CD5, // 0CD5..0CD6; KANNADA
5982 0x0CD7, // 0CD7..0CDB; UNKNOWN
5983 0x0CDC, // 0CDC..0CDE; KANNADA
5984 0x0CDF, // 0CDF ; UNKNOWN
5985 0x0CE0, // 0CE0..0CE3; KANNADA
5986 0x0CE4, // 0CE4..0CE5; UNKNOWN
5987 0x0CE6, // 0CE6..0CEF; KANNADA
5988 0x0CF0, // 0CF0 ; UNKNOWN
5989 0x0CF1, // 0CF1..0CF3; KANNADA
5990 0x0CF4, // 0CF4..0CFF; UNKNOWN
5991 0x0D00, // 0D00..0D0C; MALAYALAM
5992 0x0D0D, // 0D0D ; UNKNOWN
5993 0x0D0E, // 0D0E..0D10; MALAYALAM
5994 0x0D11, // 0D11 ; UNKNOWN
5995 0x0D12, // 0D12..0D44; MALAYALAM
5996 0x0D45, // 0D45 ; UNKNOWN
5997 0x0D46, // 0D46..0D48; MALAYALAM
5998 0x0D49, // 0D49 ; UNKNOWN
5999 0x0D4A, // 0D4A..0D4F; MALAYALAM
6000 0x0D50, // 0D50..0D53; UNKNOWN
6001 0x0D54, // 0D54..0D63; MALAYALAM
6002 0x0D64, // 0D64..0D65; UNKNOWN
6003 0x0D66, // 0D66..0D7F; MALAYALAM
6004 0x0D80, // 0D80 ; UNKNOWN
6005 0x0D81, // 0D81..0D83; SINHALA
6006 0x0D84, // 0D84 ; UNKNOWN
6007 0x0D85, // 0D85..0D96; SINHALA
6008 0x0D97, // 0D97..0D99; UNKNOWN
6009 0x0D9A, // 0D9A..0DB1; SINHALA
6010 0x0DB2, // 0DB2 ; UNKNOWN
6011 0x0DB3, // 0DB3..0DBB; SINHALA
6012 0x0DBC, // 0DBC ; UNKNOWN
6013 0x0DBD, // 0DBD ; SINHALA
6014 0x0DBE, // 0DBE..0DBF; UNKNOWN
6015 0x0DC0, // 0DC0..0DC6; SINHALA
6016 0x0DC7, // 0DC7..0DC9; UNKNOWN
6017 0x0DCA, // 0DCA ; SINHALA
6018 0x0DCB, // 0DCB..0DCE; UNKNOWN
6019 0x0DCF, // 0DCF..0DD4; SINHALA
6020 0x0DD5, // 0DD5 ; UNKNOWN
6021 0x0DD6, // 0DD6 ; SINHALA
6022 0x0DD7, // 0DD7 ; UNKNOWN
6023 0x0DD8, // 0DD8..0DDF; SINHALA
6024 0x0DE0, // 0DE0..0DE5; UNKNOWN
6025 0x0DE6, // 0DE6..0DEF; SINHALA
6026 0x0DF0, // 0DF0..0DF1; UNKNOWN
6027 0x0DF2, // 0DF2..0DF4; SINHALA
6028 0x0DF5, // 0DF5..0E00; UNKNOWN
6029 0x0E01, // 0E01..0E3A; THAI
6030 0x0E3B, // 0E3B..0E3E; UNKNOWN
6031 0x0E3F, // 0E3F ; COMMON
6032 0x0E40, // 0E40..0E5B; THAI
6033 0x0E5C, // 0E5C..0E80; UNKNOWN
6034 0x0E81, // 0E81..0E82; LAO
6035 0x0E83, // 0E83 ; UNKNOWN
6036 0x0E84, // 0E84 ; LAO
6037 0x0E85, // 0E85 ; UNKNOWN
6038 0x0E86, // 0E86..0E8A; LAO
6039 0x0E8B, // 0E8B ; UNKNOWN
6040 0x0E8C, // 0E8C..0EA3; LAO
6041 0x0EA4, // 0EA4 ; UNKNOWN
6042 0x0EA5, // 0EA5 ; LAO
6043 0x0EA6, // 0EA6 ; UNKNOWN
6044 0x0EA7, // 0EA7..0EBD; LAO
6045 0x0EBE, // 0EBE..0EBF; UNKNOWN
6046 0x0EC0, // 0EC0..0EC4; LAO
6047 0x0EC5, // 0EC5 ; UNKNOWN
6048 0x0EC6, // 0EC6 ; LAO
6049 0x0EC7, // 0EC7 ; UNKNOWN
6050 0x0EC8, // 0EC8..0ECE; LAO
6051 0x0ECF, // 0ECF ; UNKNOWN
6052 0x0ED0, // 0ED0..0ED9; LAO
6053 0x0EDA, // 0EDA..0EDB; UNKNOWN
6054 0x0EDC, // 0EDC..0EDF; LAO
6055 0x0EE0, // 0EE0..0EFF; UNKNOWN
6056 0x0F00, // 0F00..0F47; TIBETAN
6057 0x0F48, // 0F48 ; UNKNOWN
6058 0x0F49, // 0F49..0F6C; TIBETAN
6059 0x0F6D, // 0F6D..0F70; UNKNOWN
6060 0x0F71, // 0F71..0F97; TIBETAN
6061 0x0F98, // 0F98 ; UNKNOWN
6062 0x0F99, // 0F99..0FBC; TIBETAN
6063 0x0FBD, // 0FBD ; UNKNOWN
6064 0x0FBE, // 0FBE..0FCC; TIBETAN
6065 0x0FCD, // 0FCD ; UNKNOWN
6066 0x0FCE, // 0FCE..0FD4; TIBETAN
6067 0x0FD5, // 0FD5..0FD8; COMMON
6068 0x0FD9, // 0FD9..0FDA; TIBETAN
6069 0x0FDB, // 0FDB..0FFF; UNKNOWN
6070 0x1000, // 1000..109F; MYANMAR
6071 0x10A0, // 10A0..10C5; GEORGIAN
6072 0x10C6, // 10C6 ; UNKNOWN
6073 0x10C7, // 10C7 ; GEORGIAN
6074 0x10C8, // 10C8..10CC; UNKNOWN
6075 0x10CD, // 10CD ; GEORGIAN
6076 0x10CE, // 10CE..10CF; UNKNOWN
6077 0x10D0, // 10D0..10FA; GEORGIAN
6078 0x10FB, // 10FB ; COMMON
6079 0x10FC, // 10FC..10FF; GEORGIAN
6080 0x1100, // 1100..11FF; HANGUL
6081 0x1200, // 1200..1248; ETHIOPIC
6082 0x1249, // 1249 ; UNKNOWN
6083 0x124A, // 124A..124D; ETHIOPIC
6084 0x124E, // 124E..124F; UNKNOWN
6085 0x1250, // 1250..1256; ETHIOPIC
6086 0x1257, // 1257 ; UNKNOWN
6087 0x1258, // 1258 ; ETHIOPIC
6088 0x1259, // 1259 ; UNKNOWN
6089 0x125A, // 125A..125D; ETHIOPIC
6090 0x125E, // 125E..125F; UNKNOWN
6091 0x1260, // 1260..1288; ETHIOPIC
6092 0x1289, // 1289 ; UNKNOWN
6093 0x128A, // 128A..128D; ETHIOPIC
6094 0x128E, // 128E..128F; UNKNOWN
6095 0x1290, // 1290..12B0; ETHIOPIC
6096 0x12B1, // 12B1 ; UNKNOWN
6097 0x12B2, // 12B2..12B5; ETHIOPIC
6098 0x12B6, // 12B6..12B7; UNKNOWN
6099 0x12B8, // 12B8..12BE; ETHIOPIC
6100 0x12BF, // 12BF ; UNKNOWN
6101 0x12C0, // 12C0 ; ETHIOPIC
6102 0x12C1, // 12C1 ; UNKNOWN
6103 0x12C2, // 12C2..12C5; ETHIOPIC
6104 0x12C6, // 12C6..12C7; UNKNOWN
6105 0x12C8, // 12C8..12D6; ETHIOPIC
6106 0x12D7, // 12D7 ; UNKNOWN
6107 0x12D8, // 12D8..1310; ETHIOPIC
6108 0x1311, // 1311 ; UNKNOWN
6109 0x1312, // 1312..1315; ETHIOPIC
6110 0x1316, // 1316..1317; UNKNOWN
6111 0x1318, // 1318..135A; ETHIOPIC
6112 0x135B, // 135B..135C; UNKNOWN
6113 0x135D, // 135D..137C; ETHIOPIC
6114 0x137D, // 137D..137F; UNKNOWN
6115 0x1380, // 1380..1399; ETHIOPIC
6116 0x139A, // 139A..139F; UNKNOWN
6117 0x13A0, // 13A0..13F5; CHEROKEE
6118 0x13F6, // 13F6..13F7; UNKNOWN
6119 0x13F8, // 13F8..13FD; CHEROKEE
6120 0x13FE, // 13FE..13FF; UNKNOWN
6121 0x1400, // 1400..167F; CANADIAN_ABORIGINAL
6122 0x1680, // 1680..169C; OGHAM
6123 0x169D, // 169D..169F; UNKNOWN
6124 0x16A0, // 16A0..16EA; RUNIC
6125 0x16EB, // 16EB..16ED; COMMON
6126 0x16EE, // 16EE..16F8; RUNIC
6127 0x16F9, // 16F9..16FF; UNKNOWN
6128 0x1700, // 1700..1715; TAGALOG
6129 0x1716, // 1716..171E; UNKNOWN
6130 0x171F, // 171F ; TAGALOG
6131 0x1720, // 1720..1734; HANUNOO
6132 0x1735, // 1735..1736; COMMON
6133 0x1737, // 1737..173F; UNKNOWN
6134 0x1740, // 1740..1753; BUHID
6135 0x1754, // 1754..175F; UNKNOWN
6136 0x1760, // 1760..176C; TAGBANWA
6137 0x176D, // 176D ; UNKNOWN
6138 0x176E, // 176E..1770; TAGBANWA
6139 0x1771, // 1771 ; UNKNOWN
6140 0x1772, // 1772..1773; TAGBANWA
6141 0x1774, // 1774..177F; UNKNOWN
6142 0x1780, // 1780..17DD; KHMER
6143 0x17DE, // 17DE..17DF; UNKNOWN
6144 0x17E0, // 17E0..17E9; KHMER
6145 0x17EA, // 17EA..17EF; UNKNOWN
6146 0x17F0, // 17F0..17F9; KHMER
6147 0x17FA, // 17FA..17FF; UNKNOWN
6148 0x1800, // 1800..1801; MONGOLIAN
6149 0x1802, // 1802..1803; COMMON
6150 0x1804, // 1804 ; MONGOLIAN
6151 0x1805, // 1805 ; COMMON
6152 0x1806, // 1806..1819; MONGOLIAN
6153 0x181A, // 181A..181F; UNKNOWN
6154 0x1820, // 1820..1878; MONGOLIAN
6155 0x1879, // 1879..187F; UNKNOWN
6156 0x1880, // 1880..18AA; MONGOLIAN
6157 0x18AB, // 18AB..18AF; UNKNOWN
6158 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL
6159 0x18F6, // 18F6..18FF; UNKNOWN
6160 0x1900, // 1900..191E; LIMBU
6161 0x191F, // 191F ; UNKNOWN
6162 0x1920, // 1920..192B; LIMBU
6163 0x192C, // 192C..192F; UNKNOWN
6164 0x1930, // 1930..193B; LIMBU
6165 0x193C, // 193C..193F; UNKNOWN
6166 0x1940, // 1940 ; LIMBU
6167 0x1941, // 1941..1943; UNKNOWN
6168 0x1944, // 1944..194F; LIMBU
6169 0x1950, // 1950..196D; TAI_LE
6170 0x196E, // 196E..196F; UNKNOWN
6171 0x1970, // 1970..1974; TAI_LE
6172 0x1975, // 1975..197F; UNKNOWN
6173 0x1980, // 1980..19AB; NEW_TAI_LUE
6174 0x19AC, // 19AC..19AF; UNKNOWN
6175 0x19B0, // 19B0..19C9; NEW_TAI_LUE
6176 0x19CA, // 19CA..19CF; UNKNOWN
6177 0x19D0, // 19D0..19DA; NEW_TAI_LUE
6178 0x19DB, // 19DB..19DD; UNKNOWN
6179 0x19DE, // 19DE..19DF; NEW_TAI_LUE
6180 0x19E0, // 19E0..19FF; KHMER
6181 0x1A00, // 1A00..1A1B; BUGINESE
6182 0x1A1C, // 1A1C..1A1D; UNKNOWN
6183 0x1A1E, // 1A1E..1A1F; BUGINESE
6184 0x1A20, // 1A20..1A5E; TAI_THAM
6185 0x1A5F, // 1A5F ; UNKNOWN
6186 0x1A60, // 1A60..1A7C; TAI_THAM
6187 0x1A7D, // 1A7D..1A7E; UNKNOWN
6188 0x1A7F, // 1A7F..1A89; TAI_THAM
6189 0x1A8A, // 1A8A..1A8F; UNKNOWN
6190 0x1A90, // 1A90..1A99; TAI_THAM
6191 0x1A9A, // 1A9A..1A9F; UNKNOWN
6192 0x1AA0, // 1AA0..1AAD; TAI_THAM
6193 0x1AAE, // 1AAE..1AAF; UNKNOWN
6194 0x1AB0, // 1AB0..1ADD; INHERITED
6195 0x1ADE, // 1ADE..1ADF; UNKNOWN
6196 0x1AE0, // 1AE0..1AEB; INHERITED
6197 0x1AEC, // 1AEC..1AFF; UNKNOWN
6198 0x1B00, // 1B00..1B4C; BALINESE
6199 0x1B4D, // 1B4D ; UNKNOWN
6200 0x1B4E, // 1B4E..1B7F; BALINESE
6201 0x1B80, // 1B80..1BBF; SUNDANESE
6202 0x1BC0, // 1BC0..1BF3; BATAK
6203 0x1BF4, // 1BF4..1BFB; UNKNOWN
6204 0x1BFC, // 1BFC..1BFF; BATAK
6205 0x1C00, // 1C00..1C37; LEPCHA
6206 0x1C38, // 1C38..1C3A; UNKNOWN
6207 0x1C3B, // 1C3B..1C49; LEPCHA
6208 0x1C4A, // 1C4A..1C4C; UNKNOWN
6209 0x1C4D, // 1C4D..1C4F; LEPCHA
6210 0x1C50, // 1C50..1C7F; OL_CHIKI
6211 0x1C80, // 1C80..1C8A; CYRILLIC
6212 0x1C8B, // 1C8B..1C8F; UNKNOWN
6213 0x1C90, // 1C90..1CBA; GEORGIAN
6214 0x1CBB, // 1CBB..1CBC; UNKNOWN
6215 0x1CBD, // 1CBD..1CBF; GEORGIAN
6216 0x1CC0, // 1CC0..1CC7; SUNDANESE
6217 0x1CC8, // 1CC8..1CCF; UNKNOWN
6218 0x1CD0, // 1CD0..1CD2; INHERITED
6219 0x1CD3, // 1CD3 ; COMMON
6220 0x1CD4, // 1CD4..1CE0; INHERITED
6221 0x1CE1, // 1CE1 ; COMMON
6222 0x1CE2, // 1CE2..1CE8; INHERITED
6223 0x1CE9, // 1CE9..1CEC; COMMON
6224 0x1CED, // 1CED ; INHERITED
6225 0x1CEE, // 1CEE..1CF3; COMMON
6226 0x1CF4, // 1CF4 ; INHERITED
6227 0x1CF5, // 1CF5..1CF7; COMMON
6228 0x1CF8, // 1CF8..1CF9; INHERITED
6229 0x1CFA, // 1CFA ; COMMON
6230 0x1CFB, // 1CFB..1CFF; UNKNOWN
6231 0x1D00, // 1D00..1D25; LATIN
6232 0x1D26, // 1D26..1D2A; GREEK
6233 0x1D2B, // 1D2B ; CYRILLIC
6234 0x1D2C, // 1D2C..1D5C; LATIN
6235 0x1D5D, // 1D5D..1D61; GREEK
6236 0x1D62, // 1D62..1D65; LATIN
6237 0x1D66, // 1D66..1D6A; GREEK
6238 0x1D6B, // 1D6B..1D77; LATIN
6239 0x1D78, // 1D78 ; CYRILLIC
6240 0x1D79, // 1D79..1DBE; LATIN
6241 0x1DBF, // 1DBF ; GREEK
6242 0x1DC0, // 1DC0..1DFF; INHERITED
6243 0x1E00, // 1E00..1EFF; LATIN
6244 0x1F00, // 1F00..1F15; GREEK
6245 0x1F16, // 1F16..1F17; UNKNOWN
6246 0x1F18, // 1F18..1F1D; GREEK
6247 0x1F1E, // 1F1E..1F1F; UNKNOWN
6248 0x1F20, // 1F20..1F45; GREEK
6249 0x1F46, // 1F46..1F47; UNKNOWN
6250 0x1F48, // 1F48..1F4D; GREEK
6251 0x1F4E, // 1F4E..1F4F; UNKNOWN
6252 0x1F50, // 1F50..1F57; GREEK
6253 0x1F58, // 1F58 ; UNKNOWN
6254 0x1F59, // 1F59 ; GREEK
6255 0x1F5A, // 1F5A ; UNKNOWN
6256 0x1F5B, // 1F5B ; GREEK
6257 0x1F5C, // 1F5C ; UNKNOWN
6258 0x1F5D, // 1F5D ; GREEK
6259 0x1F5E, // 1F5E ; UNKNOWN
6260 0x1F5F, // 1F5F..1F7D; GREEK
6261 0x1F7E, // 1F7E..1F7F; UNKNOWN
6262 0x1F80, // 1F80..1FB4; GREEK
6263 0x1FB5, // 1FB5 ; UNKNOWN
6264 0x1FB6, // 1FB6..1FC4; GREEK
6265 0x1FC5, // 1FC5 ; UNKNOWN
6266 0x1FC6, // 1FC6..1FD3; GREEK
6267 0x1FD4, // 1FD4..1FD5; UNKNOWN
6268 0x1FD6, // 1FD6..1FDB; GREEK
6269 0x1FDC, // 1FDC ; UNKNOWN
6270 0x1FDD, // 1FDD..1FEF; GREEK
6271 0x1FF0, // 1FF0..1FF1; UNKNOWN
6272 0x1FF2, // 1FF2..1FF4; GREEK
6273 0x1FF5, // 1FF5 ; UNKNOWN
6274 0x1FF6, // 1FF6..1FFE; GREEK
6275 0x1FFF, // 1FFF ; UNKNOWN
6276 0x2000, // 2000..200B; COMMON
6277 0x200C, // 200C..200D; INHERITED
6278 0x200E, // 200E..2064; COMMON
6279 0x2065, // 2065 ; UNKNOWN
6280 0x2066, // 2066..2070; COMMON
6281 0x2071, // 2071 ; LATIN
6282 0x2072, // 2072..2073; UNKNOWN
6283 0x2074, // 2074..207E; COMMON
6284 0x207F, // 207F ; LATIN
6285 0x2080, // 2080..208E; COMMON
6286 0x208F, // 208F ; UNKNOWN
6287 0x2090, // 2090..209C; LATIN
6288 0x209D, // 209D..209F; UNKNOWN
6289 0x20A0, // 20A0..20C1; COMMON
6290 0x20C2, // 20C2..20CF; UNKNOWN
6291 0x20D0, // 20D0..20F0; INHERITED
6292 0x20F1, // 20F1..20FF; UNKNOWN
6293 0x2100, // 2100..2125; COMMON
6294 0x2126, // 2126 ; GREEK
6295 0x2127, // 2127..2129; COMMON
6296 0x212A, // 212A..212B; LATIN
6297 0x212C, // 212C..2131; COMMON
6298 0x2132, // 2132 ; LATIN
6299 0x2133, // 2133..214D; COMMON
6300 0x214E, // 214E ; LATIN
6301 0x214F, // 214F..215F; COMMON
6302 0x2160, // 2160..2188; LATIN
6303 0x2189, // 2189..218B; COMMON
6304 0x218C, // 218C..218F; UNKNOWN
6305 0x2190, // 2190..2429; COMMON
6306 0x242A, // 242A..243F; UNKNOWN
6307 0x2440, // 2440..244A; COMMON
6308 0x244B, // 244B..245F; UNKNOWN
6309 0x2460, // 2460..27FF; COMMON
6310 0x2800, // 2800..28FF; BRAILLE
6311 0x2900, // 2900..2B73; COMMON
6312 0x2B74, // 2B74..2B75; UNKNOWN
6313 0x2B76, // 2B76..2BFF; COMMON
6314 0x2C00, // 2C00..2C5F; GLAGOLITIC
6315 0x2C60, // 2C60..2C7F; LATIN
6316 0x2C80, // 2C80..2CF3; COPTIC
6317 0x2CF4, // 2CF4..2CF8; UNKNOWN
6318 0x2CF9, // 2CF9..2CFF; COPTIC
6319 0x2D00, // 2D00..2D25; GEORGIAN
6320 0x2D26, // 2D26 ; UNKNOWN
6321 0x2D27, // 2D27 ; GEORGIAN
6322 0x2D28, // 2D28..2D2C; UNKNOWN
6323 0x2D2D, // 2D2D ; GEORGIAN
6324 0x2D2E, // 2D2E..2D2F; UNKNOWN
6325 0x2D30, // 2D30..2D67; TIFINAGH
6326 0x2D68, // 2D68..2D6E; UNKNOWN
6327 0x2D6F, // 2D6F..2D70; TIFINAGH
6328 0x2D71, // 2D71..2D7E; UNKNOWN
6329 0x2D7F, // 2D7F ; TIFINAGH
6330 0x2D80, // 2D80..2D96; ETHIOPIC
6331 0x2D97, // 2D97..2D9F; UNKNOWN
6332 0x2DA0, // 2DA0..2DA6; ETHIOPIC
6333 0x2DA7, // 2DA7 ; UNKNOWN
6334 0x2DA8, // 2DA8..2DAE; ETHIOPIC
6335 0x2DAF, // 2DAF ; UNKNOWN
6336 0x2DB0, // 2DB0..2DB6; ETHIOPIC
6337 0x2DB7, // 2DB7 ; UNKNOWN
6338 0x2DB8, // 2DB8..2DBE; ETHIOPIC
6339 0x2DBF, // 2DBF ; UNKNOWN
6340 0x2DC0, // 2DC0..2DC6; ETHIOPIC
6341 0x2DC7, // 2DC7 ; UNKNOWN
6342 0x2DC8, // 2DC8..2DCE; ETHIOPIC
6343 0x2DCF, // 2DCF ; UNKNOWN
6344 0x2DD0, // 2DD0..2DD6; ETHIOPIC
6345 0x2DD7, // 2DD7 ; UNKNOWN
6346 0x2DD8, // 2DD8..2DDE; ETHIOPIC
6347 0x2DDF, // 2DDF ; UNKNOWN
6348 0x2DE0, // 2DE0..2DFF; CYRILLIC
6349 0x2E00, // 2E00..2E5D; COMMON
6350 0x2E5E, // 2E5E..2E7F; UNKNOWN
6351 0x2E80, // 2E80..2E99; HAN
6352 0x2E9A, // 2E9A ; UNKNOWN
6353 0x2E9B, // 2E9B..2EF3; HAN
6354 0x2EF4, // 2EF4..2EFF; UNKNOWN
6355 0x2F00, // 2F00..2FD5; HAN
6356 0x2FD6, // 2FD6..2FEF; UNKNOWN
6357 0x2FF0, // 2FF0..3004; COMMON
6358 0x3005, // 3005 ; HAN
6359 0x3006, // 3006 ; COMMON
6360 0x3007, // 3007 ; HAN
6361 0x3008, // 3008..3020; COMMON
6362 0x3021, // 3021..3029; HAN
6363 0x302A, // 302A..302D; INHERITED
6364 0x302E, // 302E..302F; HANGUL
6365 0x3030, // 3030..3037; COMMON
6366 0x3038, // 3038..303B; HAN
6367 0x303C, // 303C..303F; COMMON
6368 0x3040, // 3040 ; UNKNOWN
6369 0x3041, // 3041..3096; HIRAGANA
6370 0x3097, // 3097..3098; UNKNOWN
6371 0x3099, // 3099..309A; INHERITED
6372 0x309B, // 309B..309C; COMMON
6373 0x309D, // 309D..309F; HIRAGANA
6374 0x30A0, // 30A0 ; COMMON
6375 0x30A1, // 30A1..30FA; KATAKANA
6376 0x30FB, // 30FB..30FC; COMMON
6377 0x30FD, // 30FD..30FF; KATAKANA
6378 0x3100, // 3100..3104; UNKNOWN
6379 0x3105, // 3105..312F; BOPOMOFO
6380 0x3130, // 3130 ; UNKNOWN
6381 0x3131, // 3131..318E; HANGUL
6382 0x318F, // 318F ; UNKNOWN
6383 0x3190, // 3190..319F; COMMON
6384 0x31A0, // 31A0..31BF; BOPOMOFO
6385 0x31C0, // 31C0..31E5; COMMON
6386 0x31E6, // 31E6..31EE; UNKNOWN
6387 0x31EF, // 31EF ; COMMON
6388 0x31F0, // 31F0..31FF; KATAKANA
6389 0x3200, // 3200..321E; HANGUL
6390 0x321F, // 321F ; UNKNOWN
6391 0x3220, // 3220..325F; COMMON
6392 0x3260, // 3260..327E; HANGUL
6393 0x327F, // 327F..32CF; COMMON
6394 0x32D0, // 32D0..32FE; KATAKANA
6395 0x32FF, // 32FF ; COMMON
6396 0x3300, // 3300..3357; KATAKANA
6397 0x3358, // 3358..33FF; COMMON
6398 0x3400, // 3400..4DBF; HAN
6399 0x4DC0, // 4DC0..4DFF; COMMON
6400 0x4E00, // 4E00..9FFF; HAN
6401 0xA000, // A000..A48C; YI
6402 0xA48D, // A48D..A48F; UNKNOWN
6403 0xA490, // A490..A4C6; YI
6404 0xA4C7, // A4C7..A4CF; UNKNOWN
6405 0xA4D0, // A4D0..A4FF; LISU
6406 0xA500, // A500..A62B; VAI
6407 0xA62C, // A62C..A63F; UNKNOWN
6408 0xA640, // A640..A69F; CYRILLIC
6409 0xA6A0, // A6A0..A6F7; BAMUM
6410 0xA6F8, // A6F8..A6FF; UNKNOWN
6411 0xA700, // A700..A721; COMMON
6412 0xA722, // A722..A787; LATIN
6413 0xA788, // A788..A78A; COMMON
6414 0xA78B, // A78B..A7DC; LATIN
6415 0xA7DD, // A7DD..A7F0; UNKNOWN
6416 0xA7F1, // A7F1..A7FF; LATIN
6417 0xA800, // A800..A82C; SYLOTI_NAGRI
6418 0xA82D, // A82D..A82F; UNKNOWN
6419 0xA830, // A830..A839; COMMON
6420 0xA83A, // A83A..A83F; UNKNOWN
6421 0xA840, // A840..A877; PHAGS_PA
6422 0xA878, // A878..A87F; UNKNOWN
6423 0xA880, // A880..A8C5; SAURASHTRA
6424 0xA8C6, // A8C6..A8CD; UNKNOWN
6425 0xA8CE, // A8CE..A8D9; SAURASHTRA
6426 0xA8DA, // A8DA..A8DF; UNKNOWN
6427 0xA8E0, // A8E0..A8FF; DEVANAGARI
6428 0xA900, // A900..A92D; KAYAH_LI
6429 0xA92E, // A92E ; COMMON
6430 0xA92F, // A92F ; KAYAH_LI
6431 0xA930, // A930..A953; REJANG
6432 0xA954, // A954..A95E; UNKNOWN
6433 0xA95F, // A95F ; REJANG
6434 0xA960, // A960..A97C; HANGUL
6435 0xA97D, // A97D..A97F; UNKNOWN
6436 0xA980, // A980..A9CD; JAVANESE
6437 0xA9CE, // A9CE ; UNKNOWN
6438 0xA9CF, // A9CF ; COMMON
6439 0xA9D0, // A9D0..A9D9; JAVANESE
6440 0xA9DA, // A9DA..A9DD; UNKNOWN
6441 0xA9DE, // A9DE..A9DF; JAVANESE
6442 0xA9E0, // A9E0..A9FE; MYANMAR
6443 0xA9FF, // A9FF ; UNKNOWN
6444 0xAA00, // AA00..AA36; CHAM
6445 0xAA37, // AA37..AA3F; UNKNOWN
6446 0xAA40, // AA40..AA4D; CHAM
6447 0xAA4E, // AA4E..AA4F; UNKNOWN
6448 0xAA50, // AA50..AA59; CHAM
6449 0xAA5A, // AA5A..AA5B; UNKNOWN
6450 0xAA5C, // AA5C..AA5F; CHAM
6451 0xAA60, // AA60..AA7F; MYANMAR
6452 0xAA80, // AA80..AAC2; TAI_VIET
6453 0xAAC3, // AAC3..AADA; UNKNOWN
6454 0xAADB, // AADB..AADF; TAI_VIET
6455 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK
6456 0xAAF7, // AAF7..AB00; UNKNOWN
6457 0xAB01, // AB01..AB06; ETHIOPIC
6458 0xAB07, // AB07..AB08; UNKNOWN
6459 0xAB09, // AB09..AB0E; ETHIOPIC
6460 0xAB0F, // AB0F..AB10; UNKNOWN
6461 0xAB11, // AB11..AB16; ETHIOPIC
6462 0xAB17, // AB17..AB1F; UNKNOWN
6463 0xAB20, // AB20..AB26; ETHIOPIC
6464 0xAB27, // AB27 ; UNKNOWN
6465 0xAB28, // AB28..AB2E; ETHIOPIC
6466 0xAB2F, // AB2F ; UNKNOWN
6467 0xAB30, // AB30..AB5A; LATIN
6468 0xAB5B, // AB5B ; COMMON
6469 0xAB5C, // AB5C..AB64; LATIN
6470 0xAB65, // AB65 ; GREEK
6471 0xAB66, // AB66..AB69; LATIN
6472 0xAB6A, // AB6A..AB6B; COMMON
6473 0xAB6C, // AB6C..AB6F; UNKNOWN
6474 0xAB70, // AB70..ABBF; CHEROKEE
6475 0xABC0, // ABC0..ABED; MEETEI_MAYEK
6476 0xABEE, // ABEE..ABEF; UNKNOWN
6477 0xABF0, // ABF0..ABF9; MEETEI_MAYEK
6478 0xABFA, // ABFA..ABFF; UNKNOWN
6479 0xAC00, // AC00..D7A3; HANGUL
6480 0xD7A4, // D7A4..D7AF; UNKNOWN
6481 0xD7B0, // D7B0..D7C6; HANGUL
6482 0xD7C7, // D7C7..D7CA; UNKNOWN
6483 0xD7CB, // D7CB..D7FB; HANGUL
6484 0xD7FC, // D7FC..F8FF; UNKNOWN
6485 0xF900, // F900..FA6D; HAN
6486 0xFA6E, // FA6E..FA6F; UNKNOWN
6487 0xFA70, // FA70..FAD9; HAN
6488 0xFADA, // FADA..FAFF; UNKNOWN
6489 0xFB00, // FB00..FB06; LATIN
6490 0xFB07, // FB07..FB12; UNKNOWN
6491 0xFB13, // FB13..FB17; ARMENIAN
6492 0xFB18, // FB18..FB1C; UNKNOWN
6493 0xFB1D, // FB1D..FB36; HEBREW
6494 0xFB37, // FB37 ; UNKNOWN
6495 0xFB38, // FB38..FB3C; HEBREW
6496 0xFB3D, // FB3D ; UNKNOWN
6497 0xFB3E, // FB3E ; HEBREW
6498 0xFB3F, // FB3F ; UNKNOWN
6499 0xFB40, // FB40..FB41; HEBREW
6500 0xFB42, // FB42 ; UNKNOWN
6501 0xFB43, // FB43..FB44; HEBREW
6502 0xFB45, // FB45 ; UNKNOWN
6503 0xFB46, // FB46..FB4F; HEBREW
6504 0xFB50, // FB50..FD3D; ARABIC
6505 0xFD3E, // FD3E..FD3F; COMMON
6506 0xFD40, // FD40..FDCF; ARABIC
6507 0xFDD0, // FDD0..FDEF; UNKNOWN
6508 0xFDF0, // FDF0..FDFF; ARABIC
6509 0xFE00, // FE00..FE0F; INHERITED
6510 0xFE10, // FE10..FE19; COMMON
6511 0xFE1A, // FE1A..FE1F; UNKNOWN
6512 0xFE20, // FE20..FE2D; INHERITED
6513 0xFE2E, // FE2E..FE2F; CYRILLIC
6514 0xFE30, // FE30..FE52; COMMON
6515 0xFE53, // FE53 ; UNKNOWN
6516 0xFE54, // FE54..FE66; COMMON
6517 0xFE67, // FE67 ; UNKNOWN
6518 0xFE68, // FE68..FE6B; COMMON
6519 0xFE6C, // FE6C..FE6F; UNKNOWN
6520 0xFE70, // FE70..FE74; ARABIC
6521 0xFE75, // FE75 ; UNKNOWN
6522 0xFE76, // FE76..FEFC; ARABIC
6523 0xFEFD, // FEFD..FEFE; UNKNOWN
6524 0xFEFF, // FEFF ; COMMON
6525 0xFF00, // FF00 ; UNKNOWN
6526 0xFF01, // FF01..FF20; COMMON
6527 0xFF21, // FF21..FF3A; LATIN
6528 0xFF3B, // FF3B..FF40; COMMON
6529 0xFF41, // FF41..FF5A; LATIN
6530 0xFF5B, // FF5B..FF65; COMMON
6531 0xFF66, // FF66..FF6F; KATAKANA
6532 0xFF70, // FF70 ; COMMON
6533 0xFF71, // FF71..FF9D; KATAKANA
6534 0xFF9E, // FF9E..FF9F; COMMON
6535 0xFFA0, // FFA0..FFBE; HANGUL
6536 0xFFBF, // FFBF..FFC1; UNKNOWN
6537 0xFFC2, // FFC2..FFC7; HANGUL
6538 0xFFC8, // FFC8..FFC9; UNKNOWN
6539 0xFFCA, // FFCA..FFCF; HANGUL
6540 0xFFD0, // FFD0..FFD1; UNKNOWN
6541 0xFFD2, // FFD2..FFD7; HANGUL
6542 0xFFD8, // FFD8..FFD9; UNKNOWN
6543 0xFFDA, // FFDA..FFDC; HANGUL
6544 0xFFDD, // FFDD..FFDF; UNKNOWN
6545 0xFFE0, // FFE0..FFE6; COMMON
6546 0xFFE7, // FFE7 ; UNKNOWN
6547 0xFFE8, // FFE8..FFEE; COMMON
6548 0xFFEF, // FFEF..FFF8; UNKNOWN
6549 0xFFF9, // FFF9..FFFD; COMMON
6550 0xFFFE, // FFFE..FFFF; UNKNOWN
6551 0x10000, // 10000..1000B; LINEAR_B
6552 0x1000C, // 1000C ; UNKNOWN
6553 0x1000D, // 1000D..10026; LINEAR_B
6554 0x10027, // 10027 ; UNKNOWN
6555 0x10028, // 10028..1003A; LINEAR_B
6556 0x1003B, // 1003B ; UNKNOWN
6557 0x1003C, // 1003C..1003D; LINEAR_B
6558 0x1003E, // 1003E ; UNKNOWN
6559 0x1003F, // 1003F..1004D; LINEAR_B
6560 0x1004E, // 1004E..1004F; UNKNOWN
6561 0x10050, // 10050..1005D; LINEAR_B
6562 0x1005E, // 1005E..1007F; UNKNOWN
6563 0x10080, // 10080..100FA; LINEAR_B
6564 0x100FB, // 100FB..100FF; UNKNOWN
6565 0x10100, // 10100..10102; COMMON
6566 0x10103, // 10103..10106; UNKNOWN
6567 0x10107, // 10107..10133; COMMON
6568 0x10134, // 10134..10136; UNKNOWN
6569 0x10137, // 10137..1013F; COMMON
6570 0x10140, // 10140..1018E; GREEK
6571 0x1018F, // 1018F ; UNKNOWN
6572 0x10190, // 10190..1019C; COMMON
6573 0x1019D, // 1019D..1019F; UNKNOWN
6574 0x101A0, // 101A0 ; GREEK
6575 0x101A1, // 101A1..101CF; UNKNOWN
6576 0x101D0, // 101D0..101FC; COMMON
6577 0x101FD, // 101FD ; INHERITED
6578 0x101FE, // 101FE..1027F; UNKNOWN
6579 0x10280, // 10280..1029C; LYCIAN
6580 0x1029D, // 1029D..1029F; UNKNOWN
6581 0x102A0, // 102A0..102D0; CARIAN
6582 0x102D1, // 102D1..102DF; UNKNOWN
6583 0x102E0, // 102E0 ; INHERITED
6584 0x102E1, // 102E1..102FB; COMMON
6585 0x102FC, // 102FC..102FF; UNKNOWN
6586 0x10300, // 10300..10323; OLD_ITALIC
6587 0x10324, // 10324..1032C; UNKNOWN
6588 0x1032D, // 1032D..1032F; OLD_ITALIC
6589 0x10330, // 10330..1034A; GOTHIC
6590 0x1034B, // 1034B..1034F; UNKNOWN
6591 0x10350, // 10350..1037A; OLD_PERMIC
6592 0x1037B, // 1037B..1037F; UNKNOWN
6593 0x10380, // 10380..1039D; UGARITIC
6594 0x1039E, // 1039E ; UNKNOWN
6595 0x1039F, // 1039F ; UGARITIC
6596 0x103A0, // 103A0..103C3; OLD_PERSIAN
6597 0x103C4, // 103C4..103C7; UNKNOWN
6598 0x103C8, // 103C8..103D5; OLD_PERSIAN
6599 0x103D6, // 103D6..103FF; UNKNOWN
6600 0x10400, // 10400..1044F; DESERET
6601 0x10450, // 10450..1047F; SHAVIAN
6602 0x10480, // 10480..1049D; OSMANYA
6603 0x1049E, // 1049E..1049F; UNKNOWN
6604 0x104A0, // 104A0..104A9; OSMANYA
6605 0x104AA, // 104AA..104AF; UNKNOWN
6606 0x104B0, // 104B0..104D3; OSAGE
6607 0x104D4, // 104D4..104D7; UNKNOWN
6608 0x104D8, // 104D8..104FB; OSAGE
6609 0x104FC, // 104FC..104FF; UNKNOWN
6610 0x10500, // 10500..10527; ELBASAN
6611 0x10528, // 10528..1052F; UNKNOWN
6612 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN
6613 0x10564, // 10564..1056E; UNKNOWN
6614 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN
6615 0x10570, // 10570..1057A; VITHKUQI
6616 0x1057B, // 1057B ; UNKNOWN
6617 0x1057C, // 1057C..1058A; VITHKUQI
6618 0x1058B, // 1058B ; UNKNOWN
6619 0x1058C, // 1058C..10592; VITHKUQI
6620 0x10593, // 10593 ; UNKNOWN
6621 0x10594, // 10594..10595; VITHKUQI
6622 0x10596, // 10596 ; UNKNOWN
6623 0x10597, // 10597..105A1; VITHKUQI
6624 0x105A2, // 105A2 ; UNKNOWN
6625 0x105A3, // 105A3..105B1; VITHKUQI
6626 0x105B2, // 105B2 ; UNKNOWN
6627 0x105B3, // 105B3..105B9; VITHKUQI
6628 0x105BA, // 105BA ; UNKNOWN
6629 0x105BB, // 105BB..105BC; VITHKUQI
6630 0x105BD, // 105BD..105BF; UNKNOWN
6631 0x105C0, // 105C0..105F3; TODHRI
6632 0x105F4, // 105F4..105FF; UNKNOWN
6633 0x10600, // 10600..10736; LINEAR_A
6634 0x10737, // 10737..1073F; UNKNOWN
6635 0x10740, // 10740..10755; LINEAR_A
6636 0x10756, // 10756..1075F; UNKNOWN
6637 0x10760, // 10760..10767; LINEAR_A
6638 0x10768, // 10768..1077F; UNKNOWN
6639 0x10780, // 10780..10785; LATIN
6640 0x10786, // 10786 ; UNKNOWN
6641 0x10787, // 10787..107B0; LATIN
6642 0x107B1, // 107B1 ; UNKNOWN
6643 0x107B2, // 107B2..107BA; LATIN
6644 0x107BB, // 107BB..107FF; UNKNOWN
6645 0x10800, // 10800..10805; CYPRIOT
6646 0x10806, // 10806..10807; UNKNOWN
6647 0x10808, // 10808 ; CYPRIOT
6648 0x10809, // 10809 ; UNKNOWN
6649 0x1080A, // 1080A..10835; CYPRIOT
6650 0x10836, // 10836 ; UNKNOWN
6651 0x10837, // 10837..10838; CYPRIOT
6652 0x10839, // 10839..1083B; UNKNOWN
6653 0x1083C, // 1083C ; CYPRIOT
6654 0x1083D, // 1083D..1083E; UNKNOWN
6655 0x1083F, // 1083F ; CYPRIOT
6656 0x10840, // 10840..10855; IMPERIAL_ARAMAIC
6657 0x10856, // 10856 ; UNKNOWN
6658 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC
6659 0x10860, // 10860..1087F; PALMYRENE
6660 0x10880, // 10880..1089E; NABATAEAN
6661 0x1089F, // 1089F..108A6; UNKNOWN
6662 0x108A7, // 108A7..108AF; NABATAEAN
6663 0x108B0, // 108B0..108DF; UNKNOWN
6664 0x108E0, // 108E0..108F2; HATRAN
6665 0x108F3, // 108F3 ; UNKNOWN
6666 0x108F4, // 108F4..108F5; HATRAN
6667 0x108F6, // 108F6..108FA; UNKNOWN
6668 0x108FB, // 108FB..108FF; HATRAN
6669 0x10900, // 10900..1091B; PHOENICIAN
6670 0x1091C, // 1091C..1091E; UNKNOWN
6671 0x1091F, // 1091F ; PHOENICIAN
6672 0x10920, // 10920..10939; LYDIAN
6673 0x1093A, // 1093A..1093E; UNKNOWN
6674 0x1093F, // 1093F ; LYDIAN
6675 0x10940, // 10940..10959; SIDETIC
6676 0x1095A, // 1095A..1097F; UNKNOWN
6677 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS
6678 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE
6679 0x109B8, // 109B8..109BB; UNKNOWN
6680 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE
6681 0x109D0, // 109D0..109D1; UNKNOWN
6682 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE
6683 0x10A00, // 10A00..10A03; KHAROSHTHI
6684 0x10A04, // 10A04 ; UNKNOWN
6685 0x10A05, // 10A05..10A06; KHAROSHTHI
6686 0x10A07, // 10A07..10A0B; UNKNOWN
6687 0x10A0C, // 10A0C..10A13; KHAROSHTHI
6688 0x10A14, // 10A14 ; UNKNOWN
6689 0x10A15, // 10A15..10A17; KHAROSHTHI
6690 0x10A18, // 10A18 ; UNKNOWN
6691 0x10A19, // 10A19..10A35; KHAROSHTHI
6692 0x10A36, // 10A36..10A37; UNKNOWN
6693 0x10A38, // 10A38..10A3A; KHAROSHTHI
6694 0x10A3B, // 10A3B..10A3E; UNKNOWN
6695 0x10A3F, // 10A3F..10A48; KHAROSHTHI
6696 0x10A49, // 10A49..10A4F; UNKNOWN
6697 0x10A50, // 10A50..10A58; KHAROSHTHI
6698 0x10A59, // 10A59..10A5F; UNKNOWN
6699 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN
6700 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN
6701 0x10AA0, // 10AA0..10ABF; UNKNOWN
6702 0x10AC0, // 10AC0..10AE6; MANICHAEAN
6703 0x10AE7, // 10AE7..10AEA; UNKNOWN
6704 0x10AEB, // 10AEB..10AF6; MANICHAEAN
6705 0x10AF7, // 10AF7..10AFF; UNKNOWN
6706 0x10B00, // 10B00..10B35; AVESTAN
6707 0x10B36, // 10B36..10B38; UNKNOWN
6708 0x10B39, // 10B39..10B3F; AVESTAN
6709 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
6710 0x10B56, // 10B56..10B57; UNKNOWN
6711 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
6712 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
6713 0x10B73, // 10B73..10B77; UNKNOWN
6714 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
6715 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI
6716 0x10B92, // 10B92..10B98; UNKNOWN
6717 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI
6718 0x10B9D, // 10B9D..10BA8; UNKNOWN
6719 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI
6720 0x10BB0, // 10BB0..10BFF; UNKNOWN
6721 0x10C00, // 10C00..10C48; OLD_TURKIC
6722 0x10C49, // 10C49..10C7F; UNKNOWN
6723 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN
6724 0x10CB3, // 10CB3..10CBF; UNKNOWN
6725 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN
6726 0x10CF3, // 10CF3..10CF9; UNKNOWN
6727 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN
6728 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA
6729 0x10D28, // 10D28..10D2F; UNKNOWN
6730 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA
6731 0x10D3A, // 10D3A..10D3F; UNKNOWN
6732 0x10D40, // 10D40..10D65; GARAY
6733 0x10D66, // 10D66..10D68; UNKNOWN
6734 0x10D69, // 10D69..10D85; GARAY
6735 0x10D86, // 10D86..10D8D; UNKNOWN
6736 0x10D8E, // 10D8E..10D8F; GARAY
6737 0x10D90, // 10D90..10E5F; UNKNOWN
6738 0x10E60, // 10E60..10E7E; ARABIC
6739 0x10E7F, // 10E7F ; UNKNOWN
6740 0x10E80, // 10E80..10EA9; YEZIDI
6741 0x10EAA, // 10EAA ; UNKNOWN
6742 0x10EAB, // 10EAB..10EAD; YEZIDI
6743 0x10EAE, // 10EAE..10EAF; UNKNOWN
6744 0x10EB0, // 10EB0..10EB1; YEZIDI
6745 0x10EB2, // 10EB2..10EC1; UNKNOWN
6746 0x10EC2, // 10EC2..10EC7; ARABIC
6747 0x10EC8, // 10EC8..10ECF; UNKNOWN
6748 0x10ED0, // 10ED0..10ED8; ARABIC
6749 0x10ED9, // 10ED9..10EF9; UNKNOWN
6750 0x10EFA, // 10EFA..10EFF; ARABIC
6751 0x10F00, // 10F00..10F27; OLD_SOGDIAN
6752 0x10F28, // 10F28..10F2F; UNKNOWN
6753 0x10F30, // 10F30..10F59; SOGDIAN
6754 0x10F5A, // 10F5A..10F6F; UNKNOWN
6755 0x10F70, // 10F70..10F89; OLD_UYGHUR
6756 0x10F8A, // 10F8A..10FAF; UNKNOWN
6757 0x10FB0, // 10FB0..10FCB; CHORASMIAN
6758 0x10FCC, // 10FCC..10FDF; UNKNOWN
6759 0x10FE0, // 10FE0..10FF6; ELYMAIC
6760 0x10FF7, // 10FF7..10FFF; UNKNOWN
6761 0x11000, // 11000..1104D; BRAHMI
6762 0x1104E, // 1104E..11051; UNKNOWN
6763 0x11052, // 11052..11075; BRAHMI
6764 0x11076, // 11076..1107E; UNKNOWN
6765 0x1107F, // 1107F ; BRAHMI
6766 0x11080, // 11080..110C2; KAITHI
6767 0x110C3, // 110C3..110CC; UNKNOWN
6768 0x110CD, // 110CD ; KAITHI
6769 0x110CE, // 110CE..110CF; UNKNOWN
6770 0x110D0, // 110D0..110E8; SORA_SOMPENG
6771 0x110E9, // 110E9..110EF; UNKNOWN
6772 0x110F0, // 110F0..110F9; SORA_SOMPENG
6773 0x110FA, // 110FA..110FF; UNKNOWN
6774 0x11100, // 11100..11134; CHAKMA
6775 0x11135, // 11135 ; UNKNOWN
6776 0x11136, // 11136..11147; CHAKMA
6777 0x11148, // 11148..1114F; UNKNOWN
6778 0x11150, // 11150..11176; MAHAJANI
6779 0x11177, // 11177..1117F; UNKNOWN
6780 0x11180, // 11180..111DF; SHARADA
6781 0x111E0, // 111E0 ; UNKNOWN
6782 0x111E1, // 111E1..111F4; SINHALA
6783 0x111F5, // 111F5..111FF; UNKNOWN
6784 0x11200, // 11200..11211; KHOJKI
6785 0x11212, // 11212 ; UNKNOWN
6786 0x11213, // 11213..11241; KHOJKI
6787 0x11242, // 11242..1127F; UNKNOWN
6788 0x11280, // 11280..11286; MULTANI
6789 0x11287, // 11287 ; UNKNOWN
6790 0x11288, // 11288 ; MULTANI
6791 0x11289, // 11289 ; UNKNOWN
6792 0x1128A, // 1128A..1128D; MULTANI
6793 0x1128E, // 1128E ; UNKNOWN
6794 0x1128F, // 1128F..1129D; MULTANI
6795 0x1129E, // 1129E ; UNKNOWN
6796 0x1129F, // 1129F..112A9; MULTANI
6797 0x112AA, // 112AA..112AF; UNKNOWN
6798 0x112B0, // 112B0..112EA; KHUDAWADI
6799 0x112EB, // 112EB..112EF; UNKNOWN
6800 0x112F0, // 112F0..112F9; KHUDAWADI
6801 0x112FA, // 112FA..112FF; UNKNOWN
6802 0x11300, // 11300..11303; GRANTHA
6803 0x11304, // 11304 ; UNKNOWN
6804 0x11305, // 11305..1130C; GRANTHA
6805 0x1130D, // 1130D..1130E; UNKNOWN
6806 0x1130F, // 1130F..11310; GRANTHA
6807 0x11311, // 11311..11312; UNKNOWN
6808 0x11313, // 11313..11328; GRANTHA
6809 0x11329, // 11329 ; UNKNOWN
6810 0x1132A, // 1132A..11330; GRANTHA
6811 0x11331, // 11331 ; UNKNOWN
6812 0x11332, // 11332..11333; GRANTHA
6813 0x11334, // 11334 ; UNKNOWN
6814 0x11335, // 11335..11339; GRANTHA
6815 0x1133A, // 1133A ; UNKNOWN
6816 0x1133B, // 1133B ; INHERITED
6817 0x1133C, // 1133C..11344; GRANTHA
6818 0x11345, // 11345..11346; UNKNOWN
6819 0x11347, // 11347..11348; GRANTHA
6820 0x11349, // 11349..1134A; UNKNOWN
6821 0x1134B, // 1134B..1134D; GRANTHA
6822 0x1134E, // 1134E..1134F; UNKNOWN
6823 0x11350, // 11350 ; GRANTHA
6824 0x11351, // 11351..11356; UNKNOWN
6825 0x11357, // 11357 ; GRANTHA
6826 0x11358, // 11358..1135C; UNKNOWN
6827 0x1135D, // 1135D..11363; GRANTHA
6828 0x11364, // 11364..11365; UNKNOWN
6829 0x11366, // 11366..1136C; GRANTHA
6830 0x1136D, // 1136D..1136F; UNKNOWN
6831 0x11370, // 11370..11374; GRANTHA
6832 0x11375, // 11375..1137F; UNKNOWN
6833 0x11380, // 11380..11389; TULU_TIGALARI
6834 0x1138A, // 1138A ; UNKNOWN
6835 0x1138B, // 1138B ; TULU_TIGALARI
6836 0x1138C, // 1138C..1138D; UNKNOWN
6837 0x1138E, // 1138E ; TULU_TIGALARI
6838 0x1138F, // 1138F ; UNKNOWN
6839 0x11390, // 11390..113B5; TULU_TIGALARI
6840 0x113B6, // 113B6 ; UNKNOWN
6841 0x113B7, // 113B7..113C0; TULU_TIGALARI
6842 0x113C1, // 113C1 ; UNKNOWN
6843 0x113C2, // 113C2 ; TULU_TIGALARI
6844 0x113C3, // 113C3..113C4; UNKNOWN
6845 0x113C5, // 113C5 ; TULU_TIGALARI
6846 0x113C6, // 113C6 ; UNKNOWN
6847 0x113C7, // 113C7..113CA; TULU_TIGALARI
6848 0x113CB, // 113CB ; UNKNOWN
6849 0x113CC, // 113CC..113D5; TULU_TIGALARI
6850 0x113D6, // 113D6 ; UNKNOWN
6851 0x113D7, // 113D7..113D8; TULU_TIGALARI
6852 0x113D9, // 113D9..113E0; UNKNOWN
6853 0x113E1, // 113E1..113E2; TULU_TIGALARI
6854 0x113E3, // 113E3..113FF; UNKNOWN
6855 0x11400, // 11400..1145B; NEWA
6856 0x1145C, // 1145C ; UNKNOWN
6857 0x1145D, // 1145D..11461; NEWA
6858 0x11462, // 11462..1147F; UNKNOWN
6859 0x11480, // 11480..114C7; TIRHUTA
6860 0x114C8, // 114C8..114CF; UNKNOWN
6861 0x114D0, // 114D0..114D9; TIRHUTA
6862 0x114DA, // 114DA..1157F; UNKNOWN
6863 0x11580, // 11580..115B5; SIDDHAM
6864 0x115B6, // 115B6..115B7; UNKNOWN
6865 0x115B8, // 115B8..115DD; SIDDHAM
6866 0x115DE, // 115DE..115FF; UNKNOWN
6867 0x11600, // 11600..11644; MODI
6868 0x11645, // 11645..1164F; UNKNOWN
6869 0x11650, // 11650..11659; MODI
6870 0x1165A, // 1165A..1165F; UNKNOWN
6871 0x11660, // 11660..1166C; MONGOLIAN
6872 0x1166D, // 1166D..1167F; UNKNOWN
6873 0x11680, // 11680..116B9; TAKRI
6874 0x116BA, // 116BA..116BF; UNKNOWN
6875 0x116C0, // 116C0..116C9; TAKRI
6876 0x116CA, // 116CA..116CF; UNKNOWN
6877 0x116D0, // 116D0..116E3; MYANMAR
6878 0x116E4, // 116E4..116FF; UNKNOWN
6879 0x11700, // 11700..1171A; AHOM
6880 0x1171B, // 1171B..1171C; UNKNOWN
6881 0x1171D, // 1171D..1172B; AHOM
6882 0x1172C, // 1172C..1172F; UNKNOWN
6883 0x11730, // 11730..11746; AHOM
6884 0x11747, // 11747..117FF; UNKNOWN
6885 0x11800, // 11800..1183B; DOGRA
6886 0x1183C, // 1183C..1189F; UNKNOWN
6887 0x118A0, // 118A0..118F2; WARANG_CITI
6888 0x118F3, // 118F3..118FE; UNKNOWN
6889 0x118FF, // 118FF ; WARANG_CITI
6890 0x11900, // 11900..11906; DIVES_AKURU
6891 0x11907, // 11907..11908; UNKNOWN
6892 0x11909, // 11909 ; DIVES_AKURU
6893 0x1190A, // 1190A..1190B; UNKNOWN
6894 0x1190C, // 1190C..11913; DIVES_AKURU
6895 0x11914, // 11914 ; UNKNOWN
6896 0x11915, // 11915..11916; DIVES_AKURU
6897 0x11917, // 11917 ; UNKNOWN
6898 0x11918, // 11918..11935; DIVES_AKURU
6899 0x11936, // 11936 ; UNKNOWN
6900 0x11937, // 11937..11938; DIVES_AKURU
6901 0x11939, // 11939..1193A; UNKNOWN
6902 0x1193B, // 1193B..11946; DIVES_AKURU
6903 0x11947, // 11947..1194F; UNKNOWN
6904 0x11950, // 11950..11959; DIVES_AKURU
6905 0x1195A, // 1195A..1199F; UNKNOWN
6906 0x119A0, // 119A0..119A7; NANDINAGARI
6907 0x119A8, // 119A8..119A9; UNKNOWN
6908 0x119AA, // 119AA..119D7; NANDINAGARI
6909 0x119D8, // 119D8..119D9; UNKNOWN
6910 0x119DA, // 119DA..119E4; NANDINAGARI
6911 0x119E5, // 119E5..119FF; UNKNOWN
6912 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE
6913 0x11A48, // 11A48..11A4F; UNKNOWN
6914 0x11A50, // 11A50..11AA2; SOYOMBO
6915 0x11AA3, // 11AA3..11AAF; UNKNOWN
6916 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL
6917 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU
6918 0x11AF9, // 11AF9..11AFF; UNKNOWN
6919 0x11B00, // 11B00..11B09; DEVANAGARI
6920 0x11B0A, // 11B0A..11B5F; UNKNOWN
6921 0x11B60, // 11B60..11B67; SHARADA
6922 0x11B68, // 11B68..11BBF; UNKNOWN
6923 0x11BC0, // 11BC0..11BE1; SUNUWAR
6924 0x11BE2, // 11BE2..11BEF; UNKNOWN
6925 0x11BF0, // 11BF0..11BF9; SUNUWAR
6926 0x11BFA, // 11BFA..11BFF; UNKNOWN
6927 0x11C00, // 11C00..11C08; BHAIKSUKI
6928 0x11C09, // 11C09 ; UNKNOWN
6929 0x11C0A, // 11C0A..11C36; BHAIKSUKI
6930 0x11C37, // 11C37 ; UNKNOWN
6931 0x11C38, // 11C38..11C45; BHAIKSUKI
6932 0x11C46, // 11C46..11C4F; UNKNOWN
6933 0x11C50, // 11C50..11C6C; BHAIKSUKI
6934 0x11C6D, // 11C6D..11C6F; UNKNOWN
6935 0x11C70, // 11C70..11C8F; MARCHEN
6936 0x11C90, // 11C90..11C91; UNKNOWN
6937 0x11C92, // 11C92..11CA7; MARCHEN
6938 0x11CA8, // 11CA8 ; UNKNOWN
6939 0x11CA9, // 11CA9..11CB6; MARCHEN
6940 0x11CB7, // 11CB7..11CFF; UNKNOWN
6941 0x11D00, // 11D00..11D06; MASARAM_GONDI
6942 0x11D07, // 11D07 ; UNKNOWN
6943 0x11D08, // 11D08..11D09; MASARAM_GONDI
6944 0x11D0A, // 11D0A ; UNKNOWN
6945 0x11D0B, // 11D0B..11D36; MASARAM_GONDI
6946 0x11D37, // 11D37..11D39; UNKNOWN
6947 0x11D3A, // 11D3A ; MASARAM_GONDI
6948 0x11D3B, // 11D3B ; UNKNOWN
6949 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI
6950 0x11D3E, // 11D3E ; UNKNOWN
6951 0x11D3F, // 11D3F..11D47; MASARAM_GONDI
6952 0x11D48, // 11D48..11D4F; UNKNOWN
6953 0x11D50, // 11D50..11D59; MASARAM_GONDI
6954 0x11D5A, // 11D5A..11D5F; UNKNOWN
6955 0x11D60, // 11D60..11D65; GUNJALA_GONDI
6956 0x11D66, // 11D66 ; UNKNOWN
6957 0x11D67, // 11D67..11D68; GUNJALA_GONDI
6958 0x11D69, // 11D69 ; UNKNOWN
6959 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI
6960 0x11D8F, // 11D8F ; UNKNOWN
6961 0x11D90, // 11D90..11D91; GUNJALA_GONDI
6962 0x11D92, // 11D92 ; UNKNOWN
6963 0x11D93, // 11D93..11D98; GUNJALA_GONDI
6964 0x11D99, // 11D99..11D9F; UNKNOWN
6965 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI
6966 0x11DAA, // 11DAA..11DAF; UNKNOWN
6967 0x11DB0, // 11DB0..11DDB; TOLONG_SIKI
6968 0x11DDC, // 11DDC..11DDF; UNKNOWN
6969 0x11DE0, // 11DE0..11DE9; TOLONG_SIKI
6970 0x11DEA, // 11DEA..11EDF; UNKNOWN
6971 0x11EE0, // 11EE0..11EF8; MAKASAR
6972 0x11EF9, // 11EF9..11EFF; UNKNOWN
6973 0x11F00, // 11F00..11F10; KAWI
6974 0x11F11, // 11F11 ; UNKNOWN
6975 0x11F12, // 11F12..11F3A; KAWI
6976 0x11F3B, // 11F3B..11F3D; UNKNOWN
6977 0x11F3E, // 11F3E..11F5A; KAWI
6978 0x11F5B, // 11F5B..11FAF; UNKNOWN
6979 0x11FB0, // 11FB0 ; LISU
6980 0x11FB1, // 11FB1..11FBF; UNKNOWN
6981 0x11FC0, // 11FC0..11FF1; TAMIL
6982 0x11FF2, // 11FF2..11FFE; UNKNOWN
6983 0x11FFF, // 11FFF ; TAMIL
6984 0x12000, // 12000..12399; CUNEIFORM
6985 0x1239A, // 1239A..123FF; UNKNOWN
6986 0x12400, // 12400..1246E; CUNEIFORM
6987 0x1246F, // 1246F ; UNKNOWN
6988 0x12470, // 12470..12474; CUNEIFORM
6989 0x12475, // 12475..1247F; UNKNOWN
6990 0x12480, // 12480..12543; CUNEIFORM
6991 0x12544, // 12544..12F8F; UNKNOWN
6992 0x12F90, // 12F90..12FF2; CYPRO_MINOAN
6993 0x12FF3, // 12FF3..12FFF; UNKNOWN
6994 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS
6995 0x13456, // 13456..1345F; UNKNOWN
6996 0x13460, // 13460..143FA; EGYPTIAN_HIEROGLYPHS
6997 0x143FB, // 143FB..143FF; UNKNOWN
6998 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS
6999 0x14647, // 14647..160FF; UNKNOWN
7000 0x16100, // 16100..16139; GURUNG_KHEMA
7001 0x1613A, // 1613A..167FF; UNKNOWN
7002 0x16800, // 16800..16A38; BAMUM
7003 0x16A39, // 16A39..16A3F; UNKNOWN
7004 0x16A40, // 16A40..16A5E; MRO
7005 0x16A5F, // 16A5F ; UNKNOWN
7006 0x16A60, // 16A60..16A69; MRO
7007 0x16A6A, // 16A6A..16A6D; UNKNOWN
7008 0x16A6E, // 16A6E..16A6F; MRO
7009 0x16A70, // 16A70..16ABE; TANGSA
7010 0x16ABF, // 16ABF ; UNKNOWN
7011 0x16AC0, // 16AC0..16AC9; TANGSA
7012 0x16ACA, // 16ACA..16ACF; UNKNOWN
7013 0x16AD0, // 16AD0..16AED; BASSA_VAH
7014 0x16AEE, // 16AEE..16AEF; UNKNOWN
7015 0x16AF0, // 16AF0..16AF5; BASSA_VAH
7016 0x16AF6, // 16AF6..16AFF; UNKNOWN
7017 0x16B00, // 16B00..16B45; PAHAWH_HMONG
7018 0x16B46, // 16B46..16B4F; UNKNOWN
7019 0x16B50, // 16B50..16B59; PAHAWH_HMONG
7020 0x16B5A, // 16B5A ; UNKNOWN
7021 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG
7022 0x16B62, // 16B62 ; UNKNOWN
7023 0x16B63, // 16B63..16B77; PAHAWH_HMONG
7024 0x16B78, // 16B78..16B7C; UNKNOWN
7025 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG
7026 0x16B90, // 16B90..16D3F; UNKNOWN
7027 0x16D40, // 16D40..16D79; KIRAT_RAI
7028 0x16D7A, // 16D7A..16E3F; UNKNOWN
7029 0x16E40, // 16E40..16E9A; MEDEFAIDRIN
7030 0x16E9B, // 16E9B..16E9F; UNKNOWN
7031 0x16EA0, // 16EA0..16EB8; BERIA_ERFE
7032 0x16EB9, // 16EB9..16EBA; UNKNOWN
7033 0x16EBB, // 16EBB..16ED3; BERIA_ERFE
7034 0x16ED4, // 16ED4..16EFF; UNKNOWN
7035 0x16F00, // 16F00..16F4A; MIAO
7036 0x16F4B, // 16F4B..16F4E; UNKNOWN
7037 0x16F4F, // 16F4F..16F87; MIAO
7038 0x16F88, // 16F88..16F8E; UNKNOWN
7039 0x16F8F, // 16F8F..16F9F; MIAO
7040 0x16FA0, // 16FA0..16FDF; UNKNOWN
7041 0x16FE0, // 16FE0 ; TANGUT
7042 0x16FE1, // 16FE1 ; NUSHU
7043 0x16FE2, // 16FE2..16FE3; HAN
7044 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT
7045 0x16FE5, // 16FE5..16FEF; UNKNOWN
7046 0x16FF0, // 16FF0..16FF6; HAN
7047 0x16FF7, // 16FF7..16FFF; UNKNOWN
7048 0x17000, // 17000..18AFF; TANGUT
7049 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT
7050 0x18CD6, // 18CD6..18CFE; UNKNOWN
7051 0x18CFF, // 18CFF ; KHITAN_SMALL_SCRIPT
7052 0x18D00, // 18D00..18D1E; TANGUT
7053 0x18D1F, // 18D1F..18D7F; UNKNOWN
7054 0x18D80, // 18D80..18DF2; TANGUT
7055 0x18DF3, // 18DF3..1AFEF; UNKNOWN
7056 0x1AFF0, // 1AFF0..1AFF3; KATAKANA
7057 0x1AFF4, // 1AFF4 ; UNKNOWN
7058 0x1AFF5, // 1AFF5..1AFFB; KATAKANA
7059 0x1AFFC, // 1AFFC ; UNKNOWN
7060 0x1AFFD, // 1AFFD..1AFFE; KATAKANA
7061 0x1AFFF, // 1AFFF ; UNKNOWN
7062 0x1B000, // 1B000 ; KATAKANA
7063 0x1B001, // 1B001..1B11F; HIRAGANA
7064 0x1B120, // 1B120..1B122; KATAKANA
7065 0x1B123, // 1B123..1B131; UNKNOWN
7066 0x1B132, // 1B132 ; HIRAGANA
7067 0x1B133, // 1B133..1B14F; UNKNOWN
7068 0x1B150, // 1B150..1B152; HIRAGANA
7069 0x1B153, // 1B153..1B154; UNKNOWN
7070 0x1B155, // 1B155 ; KATAKANA
7071 0x1B156, // 1B156..1B163; UNKNOWN
7072 0x1B164, // 1B164..1B167; KATAKANA
7073 0x1B168, // 1B168..1B16F; UNKNOWN
7074 0x1B170, // 1B170..1B2FB; NUSHU
7075 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN
7076 0x1BC00, // 1BC00..1BC6A; DUPLOYAN
7077 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN
7078 0x1BC70, // 1BC70..1BC7C; DUPLOYAN
7079 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN
7080 0x1BC80, // 1BC80..1BC88; DUPLOYAN
7081 0x1BC89, // 1BC89..1BC8F; UNKNOWN
7082 0x1BC90, // 1BC90..1BC99; DUPLOYAN
7083 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN
7084 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN
7085 0x1BCA0, // 1BCA0..1BCA3; COMMON
7086 0x1BCA4, // 1BCA4..1CBFF; UNKNOWN
7087 0x1CC00, // 1CC00..1CCFC; COMMON
7088 0x1CCFD, // 1CCFD..1CCFF; UNKNOWN
7089 0x1CD00, // 1CD00..1CEB3; COMMON
7090 0x1CEB4, // 1CEB4..1CEB9; UNKNOWN
7091 0x1CEBA, // 1CEBA..1CED0; COMMON
7092 0x1CED1, // 1CED1..1CEDF; UNKNOWN
7093 0x1CEE0, // 1CEE0..1CEF0; COMMON
7094 0x1CEF1, // 1CEF1..1CEFF; UNKNOWN
7095 0x1CF00, // 1CF00..1CF2D; INHERITED
7096 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN
7097 0x1CF30, // 1CF30..1CF46; INHERITED
7098 0x1CF47, // 1CF47..1CF4F; UNKNOWN
7099 0x1CF50, // 1CF50..1CFC3; COMMON
7100 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN
7101 0x1D000, // 1D000..1D0F5; COMMON
7102 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN
7103 0x1D100, // 1D100..1D126; COMMON
7104 0x1D127, // 1D127..1D128; UNKNOWN
7105 0x1D129, // 1D129..1D166; COMMON
7106 0x1D167, // 1D167..1D169; INHERITED
7107 0x1D16A, // 1D16A..1D17A; COMMON
7108 0x1D17B, // 1D17B..1D182; INHERITED
7109 0x1D183, // 1D183..1D184; COMMON
7110 0x1D185, // 1D185..1D18B; INHERITED
7111 0x1D18C, // 1D18C..1D1A9; COMMON
7112 0x1D1AA, // 1D1AA..1D1AD; INHERITED
7113 0x1D1AE, // 1D1AE..1D1EA; COMMON
7114 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN
7115 0x1D200, // 1D200..1D245; GREEK
7116 0x1D246, // 1D246..1D2BF; UNKNOWN
7117 0x1D2C0, // 1D2C0..1D2D3; COMMON
7118 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN
7119 0x1D2E0, // 1D2E0..1D2F3; COMMON
7120 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN
7121 0x1D300, // 1D300..1D356; COMMON
7122 0x1D357, // 1D357..1D35F; UNKNOWN
7123 0x1D360, // 1D360..1D378; COMMON
7124 0x1D379, // 1D379..1D3FF; UNKNOWN
7125 0x1D400, // 1D400..1D454; COMMON
7126 0x1D455, // 1D455 ; UNKNOWN
7127 0x1D456, // 1D456..1D49C; COMMON
7128 0x1D49D, // 1D49D ; UNKNOWN
7129 0x1D49E, // 1D49E..1D49F; COMMON
7130 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN
7131 0x1D4A2, // 1D4A2 ; COMMON
7132 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN
7133 0x1D4A5, // 1D4A5..1D4A6; COMMON
7134 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN
7135 0x1D4A9, // 1D4A9..1D4AC; COMMON
7136 0x1D4AD, // 1D4AD ; UNKNOWN
7137 0x1D4AE, // 1D4AE..1D4B9; COMMON
7138 0x1D4BA, // 1D4BA ; UNKNOWN
7139 0x1D4BB, // 1D4BB ; COMMON
7140 0x1D4BC, // 1D4BC ; UNKNOWN
7141 0x1D4BD, // 1D4BD..1D4C3; COMMON
7142 0x1D4C4, // 1D4C4 ; UNKNOWN
7143 0x1D4C5, // 1D4C5..1D505; COMMON
7144 0x1D506, // 1D506 ; UNKNOWN
7145 0x1D507, // 1D507..1D50A; COMMON
7146 0x1D50B, // 1D50B..1D50C; UNKNOWN
7147 0x1D50D, // 1D50D..1D514; COMMON
7148 0x1D515, // 1D515 ; UNKNOWN
7149 0x1D516, // 1D516..1D51C; COMMON
7150 0x1D51D, // 1D51D ; UNKNOWN
7151 0x1D51E, // 1D51E..1D539; COMMON
7152 0x1D53A, // 1D53A ; UNKNOWN
7153 0x1D53B, // 1D53B..1D53E; COMMON
7154 0x1D53F, // 1D53F ; UNKNOWN
7155 0x1D540, // 1D540..1D544; COMMON
7156 0x1D545, // 1D545 ; UNKNOWN
7157 0x1D546, // 1D546 ; COMMON
7158 0x1D547, // 1D547..1D549; UNKNOWN
7159 0x1D54A, // 1D54A..1D550; COMMON
7160 0x1D551, // 1D551 ; UNKNOWN
7161 0x1D552, // 1D552..1D6A5; COMMON
7162 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN
7163 0x1D6A8, // 1D6A8..1D7CB; COMMON
7164 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN
7165 0x1D7CE, // 1D7CE..1D7FF; COMMON
7166 0x1D800, // 1D800..1DA8B; SIGNWRITING
7167 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN
7168 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING
7169 0x1DAA0, // 1DAA0 ; UNKNOWN
7170 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING
7171 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN
7172 0x1DF00, // 1DF00..1DF1E; LATIN
7173 0x1DF1F, // 1DF1F..1DF24; UNKNOWN
7174 0x1DF25, // 1DF25..1DF2A; LATIN
7175 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN
7176 0x1E000, // 1E000..1E006; GLAGOLITIC
7177 0x1E007, // 1E007 ; UNKNOWN
7178 0x1E008, // 1E008..1E018; GLAGOLITIC
7179 0x1E019, // 1E019..1E01A; UNKNOWN
7180 0x1E01B, // 1E01B..1E021; GLAGOLITIC
7181 0x1E022, // 1E022 ; UNKNOWN
7182 0x1E023, // 1E023..1E024; GLAGOLITIC
7183 0x1E025, // 1E025 ; UNKNOWN
7184 0x1E026, // 1E026..1E02A; GLAGOLITIC
7185 0x1E02B, // 1E02B..1E02F; UNKNOWN
7186 0x1E030, // 1E030..1E06D; CYRILLIC
7187 0x1E06E, // 1E06E..1E08E; UNKNOWN
7188 0x1E08F, // 1E08F ; CYRILLIC
7189 0x1E090, // 1E090..1E0FF; UNKNOWN
7190 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
7191 0x1E12D, // 1E12D..1E12F; UNKNOWN
7192 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
7193 0x1E13E, // 1E13E..1E13F; UNKNOWN
7194 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
7195 0x1E14A, // 1E14A..1E14D; UNKNOWN
7196 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
7197 0x1E150, // 1E150..1E28F; UNKNOWN
7198 0x1E290, // 1E290..1E2AE; TOTO
7199 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN
7200 0x1E2C0, // 1E2C0..1E2F9; WANCHO
7201 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN
7202 0x1E2FF, // 1E2FF ; WANCHO
7203 0x1E300, // 1E300..1E4CF; UNKNOWN
7204 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI
7205 0x1E4FA, // 1E4FA..1E5CF; UNKNOWN
7206 0x1E5D0, // 1E5D0..1E5FA; OL_ONAL
7207 0x1E5FB, // 1E5FB..1E5FE; UNKNOWN
7208 0x1E5FF, // 1E5FF ; OL_ONAL
7209 0x1E600, // 1E600..1E6BF; UNKNOWN
7210 0x1E6C0, // 1E6C0..1E6DE; TAI_YO
7211 0x1E6DF, // 1E6DF ; UNKNOWN
7212 0x1E6E0, // 1E6E0..1E6F5; TAI_YO
7213 0x1E6F6, // 1E6F6..1E6FD; UNKNOWN
7214 0x1E6FE, // 1E6FE..1E6FF; TAI_YO
7215 0x1E700, // 1E700..1E7DF; UNKNOWN
7216 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC
7217 0x1E7E7, // 1E7E7 ; UNKNOWN
7218 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC
7219 0x1E7EC, // 1E7EC ; UNKNOWN
7220 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC
7221 0x1E7EF, // 1E7EF ; UNKNOWN
7222 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC
7223 0x1E7FF, // 1E7FF ; UNKNOWN
7224 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI
7225 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN
7226 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI
7227 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN
7228 0x1E900, // 1E900..1E94B; ADLAM
7229 0x1E94C, // 1E94C..1E94F; UNKNOWN
7230 0x1E950, // 1E950..1E959; ADLAM
7231 0x1E95A, // 1E95A..1E95D; UNKNOWN
7232 0x1E95E, // 1E95E..1E95F; ADLAM
7233 0x1E960, // 1E960..1EC70; UNKNOWN
7234 0x1EC71, // 1EC71..1ECB4; COMMON
7235 0x1ECB5, // 1ECB5..1ED00; UNKNOWN
7236 0x1ED01, // 1ED01..1ED3D; COMMON
7237 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN
7238 0x1EE00, // 1EE00..1EE03; ARABIC
7239 0x1EE04, // 1EE04 ; UNKNOWN
7240 0x1EE05, // 1EE05..1EE1F; ARABIC
7241 0x1EE20, // 1EE20 ; UNKNOWN
7242 0x1EE21, // 1EE21..1EE22; ARABIC
7243 0x1EE23, // 1EE23 ; UNKNOWN
7244 0x1EE24, // 1EE24 ; ARABIC
7245 0x1EE25, // 1EE25..1EE26; UNKNOWN
7246 0x1EE27, // 1EE27 ; ARABIC
7247 0x1EE28, // 1EE28 ; UNKNOWN
7248 0x1EE29, // 1EE29..1EE32; ARABIC
7249 0x1EE33, // 1EE33 ; UNKNOWN
7250 0x1EE34, // 1EE34..1EE37; ARABIC
7251 0x1EE38, // 1EE38 ; UNKNOWN
7252 0x1EE39, // 1EE39 ; ARABIC
7253 0x1EE3A, // 1EE3A ; UNKNOWN
7254 0x1EE3B, // 1EE3B ; ARABIC
7255 0x1EE3C, // 1EE3C..1EE41; UNKNOWN
7256 0x1EE42, // 1EE42 ; ARABIC
7257 0x1EE43, // 1EE43..1EE46; UNKNOWN
7258 0x1EE47, // 1EE47 ; ARABIC
7259 0x1EE48, // 1EE48 ; UNKNOWN
7260 0x1EE49, // 1EE49 ; ARABIC
7261 0x1EE4A, // 1EE4A ; UNKNOWN
7262 0x1EE4B, // 1EE4B ; ARABIC
7263 0x1EE4C, // 1EE4C ; UNKNOWN
7264 0x1EE4D, // 1EE4D..1EE4F; ARABIC
7265 0x1EE50, // 1EE50 ; UNKNOWN
7266 0x1EE51, // 1EE51..1EE52; ARABIC
7267 0x1EE53, // 1EE53 ; UNKNOWN
7268 0x1EE54, // 1EE54 ; ARABIC
7269 0x1EE55, // 1EE55..1EE56; UNKNOWN
7270 0x1EE57, // 1EE57 ; ARABIC
7271 0x1EE58, // 1EE58 ; UNKNOWN
7272 0x1EE59, // 1EE59 ; ARABIC
7273 0x1EE5A, // 1EE5A ; UNKNOWN
7274 0x1EE5B, // 1EE5B ; ARABIC
7275 0x1EE5C, // 1EE5C ; UNKNOWN
7276 0x1EE5D, // 1EE5D ; ARABIC
7277 0x1EE5E, // 1EE5E ; UNKNOWN
7278 0x1EE5F, // 1EE5F ; ARABIC
7279 0x1EE60, // 1EE60 ; UNKNOWN
7280 0x1EE61, // 1EE61..1EE62; ARABIC
7281 0x1EE63, // 1EE63 ; UNKNOWN
7282 0x1EE64, // 1EE64 ; ARABIC
7283 0x1EE65, // 1EE65..1EE66; UNKNOWN
7284 0x1EE67, // 1EE67..1EE6A; ARABIC
7285 0x1EE6B, // 1EE6B ; UNKNOWN
7286 0x1EE6C, // 1EE6C..1EE72; ARABIC
7287 0x1EE73, // 1EE73 ; UNKNOWN
7288 0x1EE74, // 1EE74..1EE77; ARABIC
7289 0x1EE78, // 1EE78 ; UNKNOWN
7290 0x1EE79, // 1EE79..1EE7C; ARABIC
7291 0x1EE7D, // 1EE7D ; UNKNOWN
7292 0x1EE7E, // 1EE7E ; ARABIC
7293 0x1EE7F, // 1EE7F ; UNKNOWN
7294 0x1EE80, // 1EE80..1EE89; ARABIC
7295 0x1EE8A, // 1EE8A ; UNKNOWN
7296 0x1EE8B, // 1EE8B..1EE9B; ARABIC
7297 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN
7298 0x1EEA1, // 1EEA1..1EEA3; ARABIC
7299 0x1EEA4, // 1EEA4 ; UNKNOWN
7300 0x1EEA5, // 1EEA5..1EEA9; ARABIC
7301 0x1EEAA, // 1EEAA ; UNKNOWN
7302 0x1EEAB, // 1EEAB..1EEBB; ARABIC
7303 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN
7304 0x1EEF0, // 1EEF0..1EEF1; ARABIC
7305 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN
7306 0x1F000, // 1F000..1F02B; COMMON
7307 0x1F02C, // 1F02C..1F02F; UNKNOWN
7308 0x1F030, // 1F030..1F093; COMMON
7309 0x1F094, // 1F094..1F09F; UNKNOWN
7310 0x1F0A0, // 1F0A0..1F0AE; COMMON
7311 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN
7312 0x1F0B1, // 1F0B1..1F0BF; COMMON
7313 0x1F0C0, // 1F0C0 ; UNKNOWN
7314 0x1F0C1, // 1F0C1..1F0CF; COMMON
7315 0x1F0D0, // 1F0D0 ; UNKNOWN
7316 0x1F0D1, // 1F0D1..1F0F5; COMMON
7317 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN
7318 0x1F100, // 1F100..1F1AD; COMMON
7319 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN
7320 0x1F1E6, // 1F1E6..1F1FF; COMMON
7321 0x1F200, // 1F200 ; HIRAGANA
7322 0x1F201, // 1F201..1F202; COMMON
7323 0x1F203, // 1F203..1F20F; UNKNOWN
7324 0x1F210, // 1F210..1F23B; COMMON
7325 0x1F23C, // 1F23C..1F23F; UNKNOWN
7326 0x1F240, // 1F240..1F248; COMMON
7327 0x1F249, // 1F249..1F24F; UNKNOWN
7328 0x1F250, // 1F250..1F251; COMMON
7329 0x1F252, // 1F252..1F25F; UNKNOWN
7330 0x1F260, // 1F260..1F265; COMMON
7331 0x1F266, // 1F266..1F2FF; UNKNOWN
7332 0x1F300, // 1F300..1F6D8; COMMON
7333 0x1F6D9, // 1F6D9..1F6DB; UNKNOWN
7334 0x1F6DC, // 1F6DC..1F6EC; COMMON
7335 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN
7336 0x1F6F0, // 1F6F0..1F6FC; COMMON
7337 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN
7338 0x1F700, // 1F700..1F7D9; COMMON
7339 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN
7340 0x1F7E0, // 1F7E0..1F7EB; COMMON
7341 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN
7342 0x1F7F0, // 1F7F0 ; COMMON
7343 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN
7344 0x1F800, // 1F800..1F80B; COMMON
7345 0x1F80C, // 1F80C..1F80F; UNKNOWN
7346 0x1F810, // 1F810..1F847; COMMON
7347 0x1F848, // 1F848..1F84F; UNKNOWN
7348 0x1F850, // 1F850..1F859; COMMON
7349 0x1F85A, // 1F85A..1F85F; UNKNOWN
7350 0x1F860, // 1F860..1F887; COMMON
7351 0x1F888, // 1F888..1F88F; UNKNOWN
7352 0x1F890, // 1F890..1F8AD; COMMON
7353 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN
7354 0x1F8B0, // 1F8B0..1F8BB; COMMON
7355 0x1F8BC, // 1F8BC..1F8BF; UNKNOWN
7356 0x1F8C0, // 1F8C0..1F8C1; COMMON
7357 0x1F8C2, // 1F8C2..1F8CF; UNKNOWN
7358 0x1F8D0, // 1F8D0..1F8D8; COMMON
7359 0x1F8D9, // 1F8D9..1F8FF; UNKNOWN
7360 0x1F900, // 1F900..1FA57; COMMON
7361 0x1FA58, // 1FA58..1FA5F; UNKNOWN
7362 0x1FA60, // 1FA60..1FA6D; COMMON
7363 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN
7364 0x1FA70, // 1FA70..1FA7C; COMMON
7365 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN
7366 0x1FA80, // 1FA80..1FA8A; COMMON
7367 0x1FA8B, // 1FA8B..1FA8D; UNKNOWN
7368 0x1FA8E, // 1FA8E..1FAC6; COMMON
7369 0x1FAC7, // 1FAC7 ; UNKNOWN
7370 0x1FAC8, // 1FAC8 ; COMMON
7371 0x1FAC9, // 1FAC9..1FACC; UNKNOWN
7372 0x1FACD, // 1FACD..1FADC; COMMON
7373 0x1FADD, // 1FADD..1FADE; UNKNOWN
7374 0x1FADF, // 1FADF..1FAEA; COMMON
7375 0x1FAEB, // 1FAEB..1FAEE; UNKNOWN
7376 0x1FAEF, // 1FAEF..1FAF8; COMMON
7377 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN
7378 0x1FB00, // 1FB00..1FB92; COMMON
7379 0x1FB93, // 1FB93 ; UNKNOWN
7380 0x1FB94, // 1FB94..1FBFA; COMMON
7381 0x1FBFB, // 1FBFB..1FFFF; UNKNOWN
7382 0x20000, // 20000..2A6DF; HAN
7383 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN
7384 0x2A700, // 2A700..2B81D; HAN
7385 0x2B81E, // 2B81E..2B81F; UNKNOWN
7386 0x2B820, // 2B820..2CEAD; HAN
7387 0x2CEAE, // 2CEAE..2CEAF; UNKNOWN
7388 0x2CEB0, // 2CEB0..2EBE0; HAN
7389 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN
7390 0x2EBF0, // 2EBF0..2EE5D; HAN
7391 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN
7392 0x2F800, // 2F800..2FA1D; HAN
7393 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN
7394 0x30000, // 30000..3134A; HAN
7395 0x3134B, // 3134B..3134F; UNKNOWN
7396 0x31350, // 31350..33479; HAN
7397 0x3347A, // 3347A..E0000; UNKNOWN
7398 0xE0001, // E0001 ; COMMON
7399 0xE0002, // E0002..E001F; UNKNOWN
7400 0xE0020, // E0020..E007F; COMMON
7401 0xE0080, // E0080..E00FF; UNKNOWN
7402 0xE0100, // E0100..E01EF; INHERITED
7403 0xE01F0, // E01F0..10FFFF; UNKNOWN
7404 };
7405
7406 private static final UnicodeScript[] scripts = {
7407 COMMON, // 0000..0040
7408 LATIN, // 0041..005A
7409 COMMON, // 005B..0060
7410 LATIN, // 0061..007A
7411 COMMON, // 007B..00A9
7412 LATIN, // 00AA
7413 COMMON, // 00AB..00B9
7414 LATIN, // 00BA
7415 COMMON, // 00BB..00BF
7416 LATIN, // 00C0..00D6
7417 COMMON, // 00D7
7418 LATIN, // 00D8..00F6
7419 COMMON, // 00F7
7420 LATIN, // 00F8..02B8
7421 COMMON, // 02B9..02DF
7422 LATIN, // 02E0..02E4
7423 COMMON, // 02E5..02E9
7424 BOPOMOFO, // 02EA..02EB
7425 COMMON, // 02EC..02FF
7426 INHERITED, // 0300..036F
7427 GREEK, // 0370..0373
7428 COMMON, // 0374
7429 GREEK, // 0375..0377
7430 UNKNOWN, // 0378..0379
7431 GREEK, // 037A..037D
7432 COMMON, // 037E
7433 GREEK, // 037F
7434 UNKNOWN, // 0380..0383
7435 GREEK, // 0384
7436 COMMON, // 0385
7437 GREEK, // 0386
7438 COMMON, // 0387
7439 GREEK, // 0388..038A
7440 UNKNOWN, // 038B
7441 GREEK, // 038C
7442 UNKNOWN, // 038D
7443 GREEK, // 038E..03A1
7444 UNKNOWN, // 03A2
7445 GREEK, // 03A3..03E1
7446 COPTIC, // 03E2..03EF
7447 GREEK, // 03F0..03FF
7448 CYRILLIC, // 0400..0484
7449 INHERITED, // 0485..0486
7450 CYRILLIC, // 0487..052F
7451 UNKNOWN, // 0530
7452 ARMENIAN, // 0531..0556
7453 UNKNOWN, // 0557..0558
7454 ARMENIAN, // 0559..058A
7455 UNKNOWN, // 058B..058C
7456 ARMENIAN, // 058D..058F
7457 UNKNOWN, // 0590
7458 HEBREW, // 0591..05C7
7459 UNKNOWN, // 05C8..05CF
7460 HEBREW, // 05D0..05EA
7461 UNKNOWN, // 05EB..05EE
7462 HEBREW, // 05EF..05F4
7463 UNKNOWN, // 05F5..05FF
7464 ARABIC, // 0600..0604
7465 COMMON, // 0605
7466 ARABIC, // 0606..060B
7467 COMMON, // 060C
7468 ARABIC, // 060D..061A
7469 COMMON, // 061B
7470 ARABIC, // 061C..061E
7471 COMMON, // 061F
7472 ARABIC, // 0620..063F
7473 COMMON, // 0640
7474 ARABIC, // 0641..064A
7475 INHERITED, // 064B..0655
7476 ARABIC, // 0656..066F
7477 INHERITED, // 0670
7478 ARABIC, // 0671..06DC
7479 COMMON, // 06DD
7480 ARABIC, // 06DE..06FF
7481 SYRIAC, // 0700..070D
7482 UNKNOWN, // 070E
7483 SYRIAC, // 070F..074A
7484 UNKNOWN, // 074B..074C
7485 SYRIAC, // 074D..074F
7486 ARABIC, // 0750..077F
7487 THAANA, // 0780..07B1
7488 UNKNOWN, // 07B2..07BF
7489 NKO, // 07C0..07FA
7490 UNKNOWN, // 07FB..07FC
7491 NKO, // 07FD..07FF
7492 SAMARITAN, // 0800..082D
7493 UNKNOWN, // 082E..082F
7494 SAMARITAN, // 0830..083E
7495 UNKNOWN, // 083F
7496 MANDAIC, // 0840..085B
7497 UNKNOWN, // 085C..085D
7498 MANDAIC, // 085E
7499 UNKNOWN, // 085F
7500 SYRIAC, // 0860..086A
7501 UNKNOWN, // 086B..086F
7502 ARABIC, // 0870..0891
7503 UNKNOWN, // 0892..0896
7504 ARABIC, // 0897..08E1
7505 COMMON, // 08E2
7506 ARABIC, // 08E3..08FF
7507 DEVANAGARI, // 0900..0950
7508 INHERITED, // 0951..0954
7509 DEVANAGARI, // 0955..0963
7510 COMMON, // 0964..0965
7511 DEVANAGARI, // 0966..097F
7512 BENGALI, // 0980..0983
7513 UNKNOWN, // 0984
7514 BENGALI, // 0985..098C
7515 UNKNOWN, // 098D..098E
7516 BENGALI, // 098F..0990
7517 UNKNOWN, // 0991..0992
7518 BENGALI, // 0993..09A8
7519 UNKNOWN, // 09A9
7520 BENGALI, // 09AA..09B0
7521 UNKNOWN, // 09B1
7522 BENGALI, // 09B2
7523 UNKNOWN, // 09B3..09B5
7524 BENGALI, // 09B6..09B9
7525 UNKNOWN, // 09BA..09BB
7526 BENGALI, // 09BC..09C4
7527 UNKNOWN, // 09C5..09C6
7528 BENGALI, // 09C7..09C8
7529 UNKNOWN, // 09C9..09CA
7530 BENGALI, // 09CB..09CE
7531 UNKNOWN, // 09CF..09D6
7532 BENGALI, // 09D7
7533 UNKNOWN, // 09D8..09DB
7534 BENGALI, // 09DC..09DD
7535 UNKNOWN, // 09DE
7536 BENGALI, // 09DF..09E3
7537 UNKNOWN, // 09E4..09E5
7538 BENGALI, // 09E6..09FE
7539 UNKNOWN, // 09FF..0A00
7540 GURMUKHI, // 0A01..0A03
7541 UNKNOWN, // 0A04
7542 GURMUKHI, // 0A05..0A0A
7543 UNKNOWN, // 0A0B..0A0E
7544 GURMUKHI, // 0A0F..0A10
7545 UNKNOWN, // 0A11..0A12
7546 GURMUKHI, // 0A13..0A28
7547 UNKNOWN, // 0A29
7548 GURMUKHI, // 0A2A..0A30
7549 UNKNOWN, // 0A31
7550 GURMUKHI, // 0A32..0A33
7551 UNKNOWN, // 0A34
7552 GURMUKHI, // 0A35..0A36
7553 UNKNOWN, // 0A37
7554 GURMUKHI, // 0A38..0A39
7555 UNKNOWN, // 0A3A..0A3B
7556 GURMUKHI, // 0A3C
7557 UNKNOWN, // 0A3D
7558 GURMUKHI, // 0A3E..0A42
7559 UNKNOWN, // 0A43..0A46
7560 GURMUKHI, // 0A47..0A48
7561 UNKNOWN, // 0A49..0A4A
7562 GURMUKHI, // 0A4B..0A4D
7563 UNKNOWN, // 0A4E..0A50
7564 GURMUKHI, // 0A51
7565 UNKNOWN, // 0A52..0A58
7566 GURMUKHI, // 0A59..0A5C
7567 UNKNOWN, // 0A5D
7568 GURMUKHI, // 0A5E
7569 UNKNOWN, // 0A5F..0A65
7570 GURMUKHI, // 0A66..0A76
7571 UNKNOWN, // 0A77..0A80
7572 GUJARATI, // 0A81..0A83
7573 UNKNOWN, // 0A84
7574 GUJARATI, // 0A85..0A8D
7575 UNKNOWN, // 0A8E
7576 GUJARATI, // 0A8F..0A91
7577 UNKNOWN, // 0A92
7578 GUJARATI, // 0A93..0AA8
7579 UNKNOWN, // 0AA9
7580 GUJARATI, // 0AAA..0AB0
7581 UNKNOWN, // 0AB1
7582 GUJARATI, // 0AB2..0AB3
7583 UNKNOWN, // 0AB4
7584 GUJARATI, // 0AB5..0AB9
7585 UNKNOWN, // 0ABA..0ABB
7586 GUJARATI, // 0ABC..0AC5
7587 UNKNOWN, // 0AC6
7588 GUJARATI, // 0AC7..0AC9
7589 UNKNOWN, // 0ACA
7590 GUJARATI, // 0ACB..0ACD
7591 UNKNOWN, // 0ACE..0ACF
7592 GUJARATI, // 0AD0
7593 UNKNOWN, // 0AD1..0ADF
7594 GUJARATI, // 0AE0..0AE3
7595 UNKNOWN, // 0AE4..0AE5
7596 GUJARATI, // 0AE6..0AF1
7597 UNKNOWN, // 0AF2..0AF8
7598 GUJARATI, // 0AF9..0AFF
7599 UNKNOWN, // 0B00
7600 ORIYA, // 0B01..0B03
7601 UNKNOWN, // 0B04
7602 ORIYA, // 0B05..0B0C
7603 UNKNOWN, // 0B0D..0B0E
7604 ORIYA, // 0B0F..0B10
7605 UNKNOWN, // 0B11..0B12
7606 ORIYA, // 0B13..0B28
7607 UNKNOWN, // 0B29
7608 ORIYA, // 0B2A..0B30
7609 UNKNOWN, // 0B31
7610 ORIYA, // 0B32..0B33
7611 UNKNOWN, // 0B34
7612 ORIYA, // 0B35..0B39
7613 UNKNOWN, // 0B3A..0B3B
7614 ORIYA, // 0B3C..0B44
7615 UNKNOWN, // 0B45..0B46
7616 ORIYA, // 0B47..0B48
7617 UNKNOWN, // 0B49..0B4A
7618 ORIYA, // 0B4B..0B4D
7619 UNKNOWN, // 0B4E..0B54
7620 ORIYA, // 0B55..0B57
7621 UNKNOWN, // 0B58..0B5B
7622 ORIYA, // 0B5C..0B5D
7623 UNKNOWN, // 0B5E
7624 ORIYA, // 0B5F..0B63
7625 UNKNOWN, // 0B64..0B65
7626 ORIYA, // 0B66..0B77
7627 UNKNOWN, // 0B78..0B81
7628 TAMIL, // 0B82..0B83
7629 UNKNOWN, // 0B84
7630 TAMIL, // 0B85..0B8A
7631 UNKNOWN, // 0B8B..0B8D
7632 TAMIL, // 0B8E..0B90
7633 UNKNOWN, // 0B91
7634 TAMIL, // 0B92..0B95
7635 UNKNOWN, // 0B96..0B98
7636 TAMIL, // 0B99..0B9A
7637 UNKNOWN, // 0B9B
7638 TAMIL, // 0B9C
7639 UNKNOWN, // 0B9D
7640 TAMIL, // 0B9E..0B9F
7641 UNKNOWN, // 0BA0..0BA2
7642 TAMIL, // 0BA3..0BA4
7643 UNKNOWN, // 0BA5..0BA7
7644 TAMIL, // 0BA8..0BAA
7645 UNKNOWN, // 0BAB..0BAD
7646 TAMIL, // 0BAE..0BB9
7647 UNKNOWN, // 0BBA..0BBD
7648 TAMIL, // 0BBE..0BC2
7649 UNKNOWN, // 0BC3..0BC5
7650 TAMIL, // 0BC6..0BC8
7651 UNKNOWN, // 0BC9
7652 TAMIL, // 0BCA..0BCD
7653 UNKNOWN, // 0BCE..0BCF
7654 TAMIL, // 0BD0
7655 UNKNOWN, // 0BD1..0BD6
7656 TAMIL, // 0BD7
7657 UNKNOWN, // 0BD8..0BE5
7658 TAMIL, // 0BE6..0BFA
7659 UNKNOWN, // 0BFB..0BFF
7660 TELUGU, // 0C00..0C0C
7661 UNKNOWN, // 0C0D
7662 TELUGU, // 0C0E..0C10
7663 UNKNOWN, // 0C11
7664 TELUGU, // 0C12..0C28
7665 UNKNOWN, // 0C29
7666 TELUGU, // 0C2A..0C39
7667 UNKNOWN, // 0C3A..0C3B
7668 TELUGU, // 0C3C..0C44
7669 UNKNOWN, // 0C45
7670 TELUGU, // 0C46..0C48
7671 UNKNOWN, // 0C49
7672 TELUGU, // 0C4A..0C4D
7673 UNKNOWN, // 0C4E..0C54
7674 TELUGU, // 0C55..0C56
7675 UNKNOWN, // 0C57
7676 TELUGU, // 0C58..0C5A
7677 UNKNOWN, // 0C5B
7678 TELUGU, // 0C5C..0C5D
7679 UNKNOWN, // 0C5E..0C5F
7680 TELUGU, // 0C60..0C63
7681 UNKNOWN, // 0C64..0C65
7682 TELUGU, // 0C66..0C6F
7683 UNKNOWN, // 0C70..0C76
7684 TELUGU, // 0C77..0C7F
7685 KANNADA, // 0C80..0C8C
7686 UNKNOWN, // 0C8D
7687 KANNADA, // 0C8E..0C90
7688 UNKNOWN, // 0C91
7689 KANNADA, // 0C92..0CA8
7690 UNKNOWN, // 0CA9
7691 KANNADA, // 0CAA..0CB3
7692 UNKNOWN, // 0CB4
7693 KANNADA, // 0CB5..0CB9
7694 UNKNOWN, // 0CBA..0CBB
7695 KANNADA, // 0CBC..0CC4
7696 UNKNOWN, // 0CC5
7697 KANNADA, // 0CC6..0CC8
7698 UNKNOWN, // 0CC9
7699 KANNADA, // 0CCA..0CCD
7700 UNKNOWN, // 0CCE..0CD4
7701 KANNADA, // 0CD5..0CD6
7702 UNKNOWN, // 0CD7..0CDB
7703 KANNADA, // 0CDC..0CDE
7704 UNKNOWN, // 0CDF
7705 KANNADA, // 0CE0..0CE3
7706 UNKNOWN, // 0CE4..0CE5
7707 KANNADA, // 0CE6..0CEF
7708 UNKNOWN, // 0CF0
7709 KANNADA, // 0CF1..0CF3
7710 UNKNOWN, // 0CF4..0CFF
7711 MALAYALAM, // 0D00..0D0C
7712 UNKNOWN, // 0D0D
7713 MALAYALAM, // 0D0E..0D10
7714 UNKNOWN, // 0D11
7715 MALAYALAM, // 0D12..0D44
7716 UNKNOWN, // 0D45
7717 MALAYALAM, // 0D46..0D48
7718 UNKNOWN, // 0D49
7719 MALAYALAM, // 0D4A..0D4F
7720 UNKNOWN, // 0D50..0D53
7721 MALAYALAM, // 0D54..0D63
7722 UNKNOWN, // 0D64..0D65
7723 MALAYALAM, // 0D66..0D7F
7724 UNKNOWN, // 0D80
7725 SINHALA, // 0D81..0D83
7726 UNKNOWN, // 0D84
7727 SINHALA, // 0D85..0D96
7728 UNKNOWN, // 0D97..0D99
7729 SINHALA, // 0D9A..0DB1
7730 UNKNOWN, // 0DB2
7731 SINHALA, // 0DB3..0DBB
7732 UNKNOWN, // 0DBC
7733 SINHALA, // 0DBD
7734 UNKNOWN, // 0DBE..0DBF
7735 SINHALA, // 0DC0..0DC6
7736 UNKNOWN, // 0DC7..0DC9
7737 SINHALA, // 0DCA
7738 UNKNOWN, // 0DCB..0DCE
7739 SINHALA, // 0DCF..0DD4
7740 UNKNOWN, // 0DD5
7741 SINHALA, // 0DD6
7742 UNKNOWN, // 0DD7
7743 SINHALA, // 0DD8..0DDF
7744 UNKNOWN, // 0DE0..0DE5
7745 SINHALA, // 0DE6..0DEF
7746 UNKNOWN, // 0DF0..0DF1
7747 SINHALA, // 0DF2..0DF4
7748 UNKNOWN, // 0DF5..0E00
7749 THAI, // 0E01..0E3A
7750 UNKNOWN, // 0E3B..0E3E
7751 COMMON, // 0E3F
7752 THAI, // 0E40..0E5B
7753 UNKNOWN, // 0E5C..0E80
7754 LAO, // 0E81..0E82
7755 UNKNOWN, // 0E83
7756 LAO, // 0E84
7757 UNKNOWN, // 0E85
7758 LAO, // 0E86..0E8A
7759 UNKNOWN, // 0E8B
7760 LAO, // 0E8C..0EA3
7761 UNKNOWN, // 0EA4
7762 LAO, // 0EA5
7763 UNKNOWN, // 0EA6
7764 LAO, // 0EA7..0EBD
7765 UNKNOWN, // 0EBE..0EBF
7766 LAO, // 0EC0..0EC4
7767 UNKNOWN, // 0EC5
7768 LAO, // 0EC6
7769 UNKNOWN, // 0EC7
7770 LAO, // 0EC8..0ECE
7771 UNKNOWN, // 0ECF
7772 LAO, // 0ED0..0ED9
7773 UNKNOWN, // 0EDA..0EDB
7774 LAO, // 0EDC..0EDF
7775 UNKNOWN, // 0EE0..0EFF
7776 TIBETAN, // 0F00..0F47
7777 UNKNOWN, // 0F48
7778 TIBETAN, // 0F49..0F6C
7779 UNKNOWN, // 0F6D..0F70
7780 TIBETAN, // 0F71..0F97
7781 UNKNOWN, // 0F98
7782 TIBETAN, // 0F99..0FBC
7783 UNKNOWN, // 0FBD
7784 TIBETAN, // 0FBE..0FCC
7785 UNKNOWN, // 0FCD
7786 TIBETAN, // 0FCE..0FD4
7787 COMMON, // 0FD5..0FD8
7788 TIBETAN, // 0FD9..0FDA
7789 UNKNOWN, // 0FDB..0FFF
7790 MYANMAR, // 1000..109F
7791 GEORGIAN, // 10A0..10C5
7792 UNKNOWN, // 10C6
7793 GEORGIAN, // 10C7
7794 UNKNOWN, // 10C8..10CC
7795 GEORGIAN, // 10CD
7796 UNKNOWN, // 10CE..10CF
7797 GEORGIAN, // 10D0..10FA
7798 COMMON, // 10FB
7799 GEORGIAN, // 10FC..10FF
7800 HANGUL, // 1100..11FF
7801 ETHIOPIC, // 1200..1248
7802 UNKNOWN, // 1249
7803 ETHIOPIC, // 124A..124D
7804 UNKNOWN, // 124E..124F
7805 ETHIOPIC, // 1250..1256
7806 UNKNOWN, // 1257
7807 ETHIOPIC, // 1258
7808 UNKNOWN, // 1259
7809 ETHIOPIC, // 125A..125D
7810 UNKNOWN, // 125E..125F
7811 ETHIOPIC, // 1260..1288
7812 UNKNOWN, // 1289
7813 ETHIOPIC, // 128A..128D
7814 UNKNOWN, // 128E..128F
7815 ETHIOPIC, // 1290..12B0
7816 UNKNOWN, // 12B1
7817 ETHIOPIC, // 12B2..12B5
7818 UNKNOWN, // 12B6..12B7
7819 ETHIOPIC, // 12B8..12BE
7820 UNKNOWN, // 12BF
7821 ETHIOPIC, // 12C0
7822 UNKNOWN, // 12C1
7823 ETHIOPIC, // 12C2..12C5
7824 UNKNOWN, // 12C6..12C7
7825 ETHIOPIC, // 12C8..12D6
7826 UNKNOWN, // 12D7
7827 ETHIOPIC, // 12D8..1310
7828 UNKNOWN, // 1311
7829 ETHIOPIC, // 1312..1315
7830 UNKNOWN, // 1316..1317
7831 ETHIOPIC, // 1318..135A
7832 UNKNOWN, // 135B..135C
7833 ETHIOPIC, // 135D..137C
7834 UNKNOWN, // 137D..137F
7835 ETHIOPIC, // 1380..1399
7836 UNKNOWN, // 139A..139F
7837 CHEROKEE, // 13A0..13F5
7838 UNKNOWN, // 13F6..13F7
7839 CHEROKEE, // 13F8..13FD
7840 UNKNOWN, // 13FE..13FF
7841 CANADIAN_ABORIGINAL, // 1400..167F
7842 OGHAM, // 1680..169C
7843 UNKNOWN, // 169D..169F
7844 RUNIC, // 16A0..16EA
7845 COMMON, // 16EB..16ED
7846 RUNIC, // 16EE..16F8
7847 UNKNOWN, // 16F9..16FF
7848 TAGALOG, // 1700..1715
7849 UNKNOWN, // 1716..171E
7850 TAGALOG, // 171F
7851 HANUNOO, // 1720..1734
7852 COMMON, // 1735..1736
7853 UNKNOWN, // 1737..173F
7854 BUHID, // 1740..1753
7855 UNKNOWN, // 1754..175F
7856 TAGBANWA, // 1760..176C
7857 UNKNOWN, // 176D
7858 TAGBANWA, // 176E..1770
7859 UNKNOWN, // 1771
7860 TAGBANWA, // 1772..1773
7861 UNKNOWN, // 1774..177F
7862 KHMER, // 1780..17DD
7863 UNKNOWN, // 17DE..17DF
7864 KHMER, // 17E0..17E9
7865 UNKNOWN, // 17EA..17EF
7866 KHMER, // 17F0..17F9
7867 UNKNOWN, // 17FA..17FF
7868 MONGOLIAN, // 1800..1801
7869 COMMON, // 1802..1803
7870 MONGOLIAN, // 1804
7871 COMMON, // 1805
7872 MONGOLIAN, // 1806..1819
7873 UNKNOWN, // 181A..181F
7874 MONGOLIAN, // 1820..1878
7875 UNKNOWN, // 1879..187F
7876 MONGOLIAN, // 1880..18AA
7877 UNKNOWN, // 18AB..18AF
7878 CANADIAN_ABORIGINAL, // 18B0..18F5
7879 UNKNOWN, // 18F6..18FF
7880 LIMBU, // 1900..191E
7881 UNKNOWN, // 191F
7882 LIMBU, // 1920..192B
7883 UNKNOWN, // 192C..192F
7884 LIMBU, // 1930..193B
7885 UNKNOWN, // 193C..193F
7886 LIMBU, // 1940
7887 UNKNOWN, // 1941..1943
7888 LIMBU, // 1944..194F
7889 TAI_LE, // 1950..196D
7890 UNKNOWN, // 196E..196F
7891 TAI_LE, // 1970..1974
7892 UNKNOWN, // 1975..197F
7893 NEW_TAI_LUE, // 1980..19AB
7894 UNKNOWN, // 19AC..19AF
7895 NEW_TAI_LUE, // 19B0..19C9
7896 UNKNOWN, // 19CA..19CF
7897 NEW_TAI_LUE, // 19D0..19DA
7898 UNKNOWN, // 19DB..19DD
7899 NEW_TAI_LUE, // 19DE..19DF
7900 KHMER, // 19E0..19FF
7901 BUGINESE, // 1A00..1A1B
7902 UNKNOWN, // 1A1C..1A1D
7903 BUGINESE, // 1A1E..1A1F
7904 TAI_THAM, // 1A20..1A5E
7905 UNKNOWN, // 1A5F
7906 TAI_THAM, // 1A60..1A7C
7907 UNKNOWN, // 1A7D..1A7E
7908 TAI_THAM, // 1A7F..1A89
7909 UNKNOWN, // 1A8A..1A8F
7910 TAI_THAM, // 1A90..1A99
7911 UNKNOWN, // 1A9A..1A9F
7912 TAI_THAM, // 1AA0..1AAD
7913 UNKNOWN, // 1AAE..1AAF
7914 INHERITED, // 1AB0..1ADD
7915 UNKNOWN, // 1ADE..1ADF
7916 INHERITED, // 1AE0..1AEB
7917 UNKNOWN, // 1AEC..1AFF
7918 BALINESE, // 1B00..1B4C
7919 UNKNOWN, // 1B4D
7920 BALINESE, // 1B4E..1B7F
7921 SUNDANESE, // 1B80..1BBF
7922 BATAK, // 1BC0..1BF3
7923 UNKNOWN, // 1BF4..1BFB
7924 BATAK, // 1BFC..1BFF
7925 LEPCHA, // 1C00..1C37
7926 UNKNOWN, // 1C38..1C3A
7927 LEPCHA, // 1C3B..1C49
7928 UNKNOWN, // 1C4A..1C4C
7929 LEPCHA, // 1C4D..1C4F
7930 OL_CHIKI, // 1C50..1C7F
7931 CYRILLIC, // 1C80..1C8A
7932 UNKNOWN, // 1C8B..1C8F
7933 GEORGIAN, // 1C90..1CBA
7934 UNKNOWN, // 1CBB..1CBC
7935 GEORGIAN, // 1CBD..1CBF
7936 SUNDANESE, // 1CC0..1CC7
7937 UNKNOWN, // 1CC8..1CCF
7938 INHERITED, // 1CD0..1CD2
7939 COMMON, // 1CD3
7940 INHERITED, // 1CD4..1CE0
7941 COMMON, // 1CE1
7942 INHERITED, // 1CE2..1CE8
7943 COMMON, // 1CE9..1CEC
7944 INHERITED, // 1CED
7945 COMMON, // 1CEE..1CF3
7946 INHERITED, // 1CF4
7947 COMMON, // 1CF5..1CF7
7948 INHERITED, // 1CF8..1CF9
7949 COMMON, // 1CFA
7950 UNKNOWN, // 1CFB..1CFF
7951 LATIN, // 1D00..1D25
7952 GREEK, // 1D26..1D2A
7953 CYRILLIC, // 1D2B
7954 LATIN, // 1D2C..1D5C
7955 GREEK, // 1D5D..1D61
7956 LATIN, // 1D62..1D65
7957 GREEK, // 1D66..1D6A
7958 LATIN, // 1D6B..1D77
7959 CYRILLIC, // 1D78
7960 LATIN, // 1D79..1DBE
7961 GREEK, // 1DBF
7962 INHERITED, // 1DC0..1DFF
7963 LATIN, // 1E00..1EFF
7964 GREEK, // 1F00..1F15
7965 UNKNOWN, // 1F16..1F17
7966 GREEK, // 1F18..1F1D
7967 UNKNOWN, // 1F1E..1F1F
7968 GREEK, // 1F20..1F45
7969 UNKNOWN, // 1F46..1F47
7970 GREEK, // 1F48..1F4D
7971 UNKNOWN, // 1F4E..1F4F
7972 GREEK, // 1F50..1F57
7973 UNKNOWN, // 1F58
7974 GREEK, // 1F59
7975 UNKNOWN, // 1F5A
7976 GREEK, // 1F5B
7977 UNKNOWN, // 1F5C
7978 GREEK, // 1F5D
7979 UNKNOWN, // 1F5E
7980 GREEK, // 1F5F..1F7D
7981 UNKNOWN, // 1F7E..1F7F
7982 GREEK, // 1F80..1FB4
7983 UNKNOWN, // 1FB5
7984 GREEK, // 1FB6..1FC4
7985 UNKNOWN, // 1FC5
7986 GREEK, // 1FC6..1FD3
7987 UNKNOWN, // 1FD4..1FD5
7988 GREEK, // 1FD6..1FDB
7989 UNKNOWN, // 1FDC
7990 GREEK, // 1FDD..1FEF
7991 UNKNOWN, // 1FF0..1FF1
7992 GREEK, // 1FF2..1FF4
7993 UNKNOWN, // 1FF5
7994 GREEK, // 1FF6..1FFE
7995 UNKNOWN, // 1FFF
7996 COMMON, // 2000..200B
7997 INHERITED, // 200C..200D
7998 COMMON, // 200E..2064
7999 UNKNOWN, // 2065
8000 COMMON, // 2066..2070
8001 LATIN, // 2071
8002 UNKNOWN, // 2072..2073
8003 COMMON, // 2074..207E
8004 LATIN, // 207F
8005 COMMON, // 2080..208E
8006 UNKNOWN, // 208F
8007 LATIN, // 2090..209C
8008 UNKNOWN, // 209D..209F
8009 COMMON, // 20A0..20C1
8010 UNKNOWN, // 20C2..20CF
8011 INHERITED, // 20D0..20F0
8012 UNKNOWN, // 20F1..20FF
8013 COMMON, // 2100..2125
8014 GREEK, // 2126
8015 COMMON, // 2127..2129
8016 LATIN, // 212A..212B
8017 COMMON, // 212C..2131
8018 LATIN, // 2132
8019 COMMON, // 2133..214D
8020 LATIN, // 214E
8021 COMMON, // 214F..215F
8022 LATIN, // 2160..2188
8023 COMMON, // 2189..218B
8024 UNKNOWN, // 218C..218F
8025 COMMON, // 2190..2429
8026 UNKNOWN, // 242A..243F
8027 COMMON, // 2440..244A
8028 UNKNOWN, // 244B..245F
8029 COMMON, // 2460..27FF
8030 BRAILLE, // 2800..28FF
8031 COMMON, // 2900..2B73
8032 UNKNOWN, // 2B74..2B75
8033 COMMON, // 2B76..2BFF
8034 GLAGOLITIC, // 2C00..2C5F
8035 LATIN, // 2C60..2C7F
8036 COPTIC, // 2C80..2CF3
8037 UNKNOWN, // 2CF4..2CF8
8038 COPTIC, // 2CF9..2CFF
8039 GEORGIAN, // 2D00..2D25
8040 UNKNOWN, // 2D26
8041 GEORGIAN, // 2D27
8042 UNKNOWN, // 2D28..2D2C
8043 GEORGIAN, // 2D2D
8044 UNKNOWN, // 2D2E..2D2F
8045 TIFINAGH, // 2D30..2D67
8046 UNKNOWN, // 2D68..2D6E
8047 TIFINAGH, // 2D6F..2D70
8048 UNKNOWN, // 2D71..2D7E
8049 TIFINAGH, // 2D7F
8050 ETHIOPIC, // 2D80..2D96
8051 UNKNOWN, // 2D97..2D9F
8052 ETHIOPIC, // 2DA0..2DA6
8053 UNKNOWN, // 2DA7
8054 ETHIOPIC, // 2DA8..2DAE
8055 UNKNOWN, // 2DAF
8056 ETHIOPIC, // 2DB0..2DB6
8057 UNKNOWN, // 2DB7
8058 ETHIOPIC, // 2DB8..2DBE
8059 UNKNOWN, // 2DBF
8060 ETHIOPIC, // 2DC0..2DC6
8061 UNKNOWN, // 2DC7
8062 ETHIOPIC, // 2DC8..2DCE
8063 UNKNOWN, // 2DCF
8064 ETHIOPIC, // 2DD0..2DD6
8065 UNKNOWN, // 2DD7
8066 ETHIOPIC, // 2DD8..2DDE
8067 UNKNOWN, // 2DDF
8068 CYRILLIC, // 2DE0..2DFF
8069 COMMON, // 2E00..2E5D
8070 UNKNOWN, // 2E5E..2E7F
8071 HAN, // 2E80..2E99
8072 UNKNOWN, // 2E9A
8073 HAN, // 2E9B..2EF3
8074 UNKNOWN, // 2EF4..2EFF
8075 HAN, // 2F00..2FD5
8076 UNKNOWN, // 2FD6..2FEF
8077 COMMON, // 2FF0..3004
8078 HAN, // 3005
8079 COMMON, // 3006
8080 HAN, // 3007
8081 COMMON, // 3008..3020
8082 HAN, // 3021..3029
8083 INHERITED, // 302A..302D
8084 HANGUL, // 302E..302F
8085 COMMON, // 3030..3037
8086 HAN, // 3038..303B
8087 COMMON, // 303C..303F
8088 UNKNOWN, // 3040
8089 HIRAGANA, // 3041..3096
8090 UNKNOWN, // 3097..3098
8091 INHERITED, // 3099..309A
8092 COMMON, // 309B..309C
8093 HIRAGANA, // 309D..309F
8094 COMMON, // 30A0
8095 KATAKANA, // 30A1..30FA
8096 COMMON, // 30FB..30FC
8097 KATAKANA, // 30FD..30FF
8098 UNKNOWN, // 3100..3104
8099 BOPOMOFO, // 3105..312F
8100 UNKNOWN, // 3130
8101 HANGUL, // 3131..318E
8102 UNKNOWN, // 318F
8103 COMMON, // 3190..319F
8104 BOPOMOFO, // 31A0..31BF
8105 COMMON, // 31C0..31E5
8106 UNKNOWN, // 31E6..31EE
8107 COMMON, // 31EF
8108 KATAKANA, // 31F0..31FF
8109 HANGUL, // 3200..321E
8110 UNKNOWN, // 321F
8111 COMMON, // 3220..325F
8112 HANGUL, // 3260..327E
8113 COMMON, // 327F..32CF
8114 KATAKANA, // 32D0..32FE
8115 COMMON, // 32FF
8116 KATAKANA, // 3300..3357
8117 COMMON, // 3358..33FF
8118 HAN, // 3400..4DBF
8119 COMMON, // 4DC0..4DFF
8120 HAN, // 4E00..9FFF
8121 YI, // A000..A48C
8122 UNKNOWN, // A48D..A48F
8123 YI, // A490..A4C6
8124 UNKNOWN, // A4C7..A4CF
8125 LISU, // A4D0..A4FF
8126 VAI, // A500..A62B
8127 UNKNOWN, // A62C..A63F
8128 CYRILLIC, // A640..A69F
8129 BAMUM, // A6A0..A6F7
8130 UNKNOWN, // A6F8..A6FF
8131 COMMON, // A700..A721
8132 LATIN, // A722..A787
8133 COMMON, // A788..A78A
8134 LATIN, // A78B..A7DC
8135 UNKNOWN, // A7DD..A7F0
8136 LATIN, // A7F1..A7FF
8137 SYLOTI_NAGRI, // A800..A82C
8138 UNKNOWN, // A82D..A82F
8139 COMMON, // A830..A839
8140 UNKNOWN, // A83A..A83F
8141 PHAGS_PA, // A840..A877
8142 UNKNOWN, // A878..A87F
8143 SAURASHTRA, // A880..A8C5
8144 UNKNOWN, // A8C6..A8CD
8145 SAURASHTRA, // A8CE..A8D9
8146 UNKNOWN, // A8DA..A8DF
8147 DEVANAGARI, // A8E0..A8FF
8148 KAYAH_LI, // A900..A92D
8149 COMMON, // A92E
8150 KAYAH_LI, // A92F
8151 REJANG, // A930..A953
8152 UNKNOWN, // A954..A95E
8153 REJANG, // A95F
8154 HANGUL, // A960..A97C
8155 UNKNOWN, // A97D..A97F
8156 JAVANESE, // A980..A9CD
8157 UNKNOWN, // A9CE
8158 COMMON, // A9CF
8159 JAVANESE, // A9D0..A9D9
8160 UNKNOWN, // A9DA..A9DD
8161 JAVANESE, // A9DE..A9DF
8162 MYANMAR, // A9E0..A9FE
8163 UNKNOWN, // A9FF
8164 CHAM, // AA00..AA36
8165 UNKNOWN, // AA37..AA3F
8166 CHAM, // AA40..AA4D
8167 UNKNOWN, // AA4E..AA4F
8168 CHAM, // AA50..AA59
8169 UNKNOWN, // AA5A..AA5B
8170 CHAM, // AA5C..AA5F
8171 MYANMAR, // AA60..AA7F
8172 TAI_VIET, // AA80..AAC2
8173 UNKNOWN, // AAC3..AADA
8174 TAI_VIET, // AADB..AADF
8175 MEETEI_MAYEK, // AAE0..AAF6
8176 UNKNOWN, // AAF7..AB00
8177 ETHIOPIC, // AB01..AB06
8178 UNKNOWN, // AB07..AB08
8179 ETHIOPIC, // AB09..AB0E
8180 UNKNOWN, // AB0F..AB10
8181 ETHIOPIC, // AB11..AB16
8182 UNKNOWN, // AB17..AB1F
8183 ETHIOPIC, // AB20..AB26
8184 UNKNOWN, // AB27
8185 ETHIOPIC, // AB28..AB2E
8186 UNKNOWN, // AB2F
8187 LATIN, // AB30..AB5A
8188 COMMON, // AB5B
8189 LATIN, // AB5C..AB64
8190 GREEK, // AB65
8191 LATIN, // AB66..AB69
8192 COMMON, // AB6A..AB6B
8193 UNKNOWN, // AB6C..AB6F
8194 CHEROKEE, // AB70..ABBF
8195 MEETEI_MAYEK, // ABC0..ABED
8196 UNKNOWN, // ABEE..ABEF
8197 MEETEI_MAYEK, // ABF0..ABF9
8198 UNKNOWN, // ABFA..ABFF
8199 HANGUL, // AC00..D7A3
8200 UNKNOWN, // D7A4..D7AF
8201 HANGUL, // D7B0..D7C6
8202 UNKNOWN, // D7C7..D7CA
8203 HANGUL, // D7CB..D7FB
8204 UNKNOWN, // D7FC..F8FF
8205 HAN, // F900..FA6D
8206 UNKNOWN, // FA6E..FA6F
8207 HAN, // FA70..FAD9
8208 UNKNOWN, // FADA..FAFF
8209 LATIN, // FB00..FB06
8210 UNKNOWN, // FB07..FB12
8211 ARMENIAN, // FB13..FB17
8212 UNKNOWN, // FB18..FB1C
8213 HEBREW, // FB1D..FB36
8214 UNKNOWN, // FB37
8215 HEBREW, // FB38..FB3C
8216 UNKNOWN, // FB3D
8217 HEBREW, // FB3E
8218 UNKNOWN, // FB3F
8219 HEBREW, // FB40..FB41
8220 UNKNOWN, // FB42
8221 HEBREW, // FB43..FB44
8222 UNKNOWN, // FB45
8223 HEBREW, // FB46..FB4F
8224 ARABIC, // FB50..FD3D
8225 COMMON, // FD3E..FD3F
8226 ARABIC, // FD40..FDCF
8227 UNKNOWN, // FDD0..FDEF
8228 ARABIC, // FDF0..FDFF
8229 INHERITED, // FE00..FE0F
8230 COMMON, // FE10..FE19
8231 UNKNOWN, // FE1A..FE1F
8232 INHERITED, // FE20..FE2D
8233 CYRILLIC, // FE2E..FE2F
8234 COMMON, // FE30..FE52
8235 UNKNOWN, // FE53
8236 COMMON, // FE54..FE66
8237 UNKNOWN, // FE67
8238 COMMON, // FE68..FE6B
8239 UNKNOWN, // FE6C..FE6F
8240 ARABIC, // FE70..FE74
8241 UNKNOWN, // FE75
8242 ARABIC, // FE76..FEFC
8243 UNKNOWN, // FEFD..FEFE
8244 COMMON, // FEFF
8245 UNKNOWN, // FF00
8246 COMMON, // FF01..FF20
8247 LATIN, // FF21..FF3A
8248 COMMON, // FF3B..FF40
8249 LATIN, // FF41..FF5A
8250 COMMON, // FF5B..FF65
8251 KATAKANA, // FF66..FF6F
8252 COMMON, // FF70
8253 KATAKANA, // FF71..FF9D
8254 COMMON, // FF9E..FF9F
8255 HANGUL, // FFA0..FFBE
8256 UNKNOWN, // FFBF..FFC1
8257 HANGUL, // FFC2..FFC7
8258 UNKNOWN, // FFC8..FFC9
8259 HANGUL, // FFCA..FFCF
8260 UNKNOWN, // FFD0..FFD1
8261 HANGUL, // FFD2..FFD7
8262 UNKNOWN, // FFD8..FFD9
8263 HANGUL, // FFDA..FFDC
8264 UNKNOWN, // FFDD..FFDF
8265 COMMON, // FFE0..FFE6
8266 UNKNOWN, // FFE7
8267 COMMON, // FFE8..FFEE
8268 UNKNOWN, // FFEF..FFF8
8269 COMMON, // FFF9..FFFD
8270 UNKNOWN, // FFFE..FFFF
8271 LINEAR_B, // 10000..1000B
8272 UNKNOWN, // 1000C
8273 LINEAR_B, // 1000D..10026
8274 UNKNOWN, // 10027
8275 LINEAR_B, // 10028..1003A
8276 UNKNOWN, // 1003B
8277 LINEAR_B, // 1003C..1003D
8278 UNKNOWN, // 1003E
8279 LINEAR_B, // 1003F..1004D
8280 UNKNOWN, // 1004E..1004F
8281 LINEAR_B, // 10050..1005D
8282 UNKNOWN, // 1005E..1007F
8283 LINEAR_B, // 10080..100FA
8284 UNKNOWN, // 100FB..100FF
8285 COMMON, // 10100..10102
8286 UNKNOWN, // 10103..10106
8287 COMMON, // 10107..10133
8288 UNKNOWN, // 10134..10136
8289 COMMON, // 10137..1013F
8290 GREEK, // 10140..1018E
8291 UNKNOWN, // 1018F
8292 COMMON, // 10190..1019C
8293 UNKNOWN, // 1019D..1019F
8294 GREEK, // 101A0
8295 UNKNOWN, // 101A1..101CF
8296 COMMON, // 101D0..101FC
8297 INHERITED, // 101FD
8298 UNKNOWN, // 101FE..1027F
8299 LYCIAN, // 10280..1029C
8300 UNKNOWN, // 1029D..1029F
8301 CARIAN, // 102A0..102D0
8302 UNKNOWN, // 102D1..102DF
8303 INHERITED, // 102E0
8304 COMMON, // 102E1..102FB
8305 UNKNOWN, // 102FC..102FF
8306 OLD_ITALIC, // 10300..10323
8307 UNKNOWN, // 10324..1032C
8308 OLD_ITALIC, // 1032D..1032F
8309 GOTHIC, // 10330..1034A
8310 UNKNOWN, // 1034B..1034F
8311 OLD_PERMIC, // 10350..1037A
8312 UNKNOWN, // 1037B..1037F
8313 UGARITIC, // 10380..1039D
8314 UNKNOWN, // 1039E
8315 UGARITIC, // 1039F
8316 OLD_PERSIAN, // 103A0..103C3
8317 UNKNOWN, // 103C4..103C7
8318 OLD_PERSIAN, // 103C8..103D5
8319 UNKNOWN, // 103D6..103FF
8320 DESERET, // 10400..1044F
8321 SHAVIAN, // 10450..1047F
8322 OSMANYA, // 10480..1049D
8323 UNKNOWN, // 1049E..1049F
8324 OSMANYA, // 104A0..104A9
8325 UNKNOWN, // 104AA..104AF
8326 OSAGE, // 104B0..104D3
8327 UNKNOWN, // 104D4..104D7
8328 OSAGE, // 104D8..104FB
8329 UNKNOWN, // 104FC..104FF
8330 ELBASAN, // 10500..10527
8331 UNKNOWN, // 10528..1052F
8332 CAUCASIAN_ALBANIAN, // 10530..10563
8333 UNKNOWN, // 10564..1056E
8334 CAUCASIAN_ALBANIAN, // 1056F
8335 VITHKUQI, // 10570..1057A
8336 UNKNOWN, // 1057B
8337 VITHKUQI, // 1057C..1058A
8338 UNKNOWN, // 1058B
8339 VITHKUQI, // 1058C..10592
8340 UNKNOWN, // 10593
8341 VITHKUQI, // 10594..10595
8342 UNKNOWN, // 10596
8343 VITHKUQI, // 10597..105A1
8344 UNKNOWN, // 105A2
8345 VITHKUQI, // 105A3..105B1
8346 UNKNOWN, // 105B2
8347 VITHKUQI, // 105B3..105B9
8348 UNKNOWN, // 105BA
8349 VITHKUQI, // 105BB..105BC
8350 UNKNOWN, // 105BD..105BF
8351 TODHRI, // 105C0..105F3
8352 UNKNOWN, // 105F4..105FF
8353 LINEAR_A, // 10600..10736
8354 UNKNOWN, // 10737..1073F
8355 LINEAR_A, // 10740..10755
8356 UNKNOWN, // 10756..1075F
8357 LINEAR_A, // 10760..10767
8358 UNKNOWN, // 10768..1077F
8359 LATIN, // 10780..10785
8360 UNKNOWN, // 10786
8361 LATIN, // 10787..107B0
8362 UNKNOWN, // 107B1
8363 LATIN, // 107B2..107BA
8364 UNKNOWN, // 107BB..107FF
8365 CYPRIOT, // 10800..10805
8366 UNKNOWN, // 10806..10807
8367 CYPRIOT, // 10808
8368 UNKNOWN, // 10809
8369 CYPRIOT, // 1080A..10835
8370 UNKNOWN, // 10836
8371 CYPRIOT, // 10837..10838
8372 UNKNOWN, // 10839..1083B
8373 CYPRIOT, // 1083C
8374 UNKNOWN, // 1083D..1083E
8375 CYPRIOT, // 1083F
8376 IMPERIAL_ARAMAIC, // 10840..10855
8377 UNKNOWN, // 10856
8378 IMPERIAL_ARAMAIC, // 10857..1085F
8379 PALMYRENE, // 10860..1087F
8380 NABATAEAN, // 10880..1089E
8381 UNKNOWN, // 1089F..108A6
8382 NABATAEAN, // 108A7..108AF
8383 UNKNOWN, // 108B0..108DF
8384 HATRAN, // 108E0..108F2
8385 UNKNOWN, // 108F3
8386 HATRAN, // 108F4..108F5
8387 UNKNOWN, // 108F6..108FA
8388 HATRAN, // 108FB..108FF
8389 PHOENICIAN, // 10900..1091B
8390 UNKNOWN, // 1091C..1091E
8391 PHOENICIAN, // 1091F
8392 LYDIAN, // 10920..10939
8393 UNKNOWN, // 1093A..1093E
8394 LYDIAN, // 1093F
8395 SIDETIC, // 10940..10959
8396 UNKNOWN, // 1095A..1097F
8397 MEROITIC_HIEROGLYPHS, // 10980..1099F
8398 MEROITIC_CURSIVE, // 109A0..109B7
8399 UNKNOWN, // 109B8..109BB
8400 MEROITIC_CURSIVE, // 109BC..109CF
8401 UNKNOWN, // 109D0..109D1
8402 MEROITIC_CURSIVE, // 109D2..109FF
8403 KHAROSHTHI, // 10A00..10A03
8404 UNKNOWN, // 10A04
8405 KHAROSHTHI, // 10A05..10A06
8406 UNKNOWN, // 10A07..10A0B
8407 KHAROSHTHI, // 10A0C..10A13
8408 UNKNOWN, // 10A14
8409 KHAROSHTHI, // 10A15..10A17
8410 UNKNOWN, // 10A18
8411 KHAROSHTHI, // 10A19..10A35
8412 UNKNOWN, // 10A36..10A37
8413 KHAROSHTHI, // 10A38..10A3A
8414 UNKNOWN, // 10A3B..10A3E
8415 KHAROSHTHI, // 10A3F..10A48
8416 UNKNOWN, // 10A49..10A4F
8417 KHAROSHTHI, // 10A50..10A58
8418 UNKNOWN, // 10A59..10A5F
8419 OLD_SOUTH_ARABIAN, // 10A60..10A7F
8420 OLD_NORTH_ARABIAN, // 10A80..10A9F
8421 UNKNOWN, // 10AA0..10ABF
8422 MANICHAEAN, // 10AC0..10AE6
8423 UNKNOWN, // 10AE7..10AEA
8424 MANICHAEAN, // 10AEB..10AF6
8425 UNKNOWN, // 10AF7..10AFF
8426 AVESTAN, // 10B00..10B35
8427 UNKNOWN, // 10B36..10B38
8428 AVESTAN, // 10B39..10B3F
8429 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55
8430 UNKNOWN, // 10B56..10B57
8431 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F
8432 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72
8433 UNKNOWN, // 10B73..10B77
8434 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F
8435 PSALTER_PAHLAVI, // 10B80..10B91
8436 UNKNOWN, // 10B92..10B98
8437 PSALTER_PAHLAVI, // 10B99..10B9C
8438 UNKNOWN, // 10B9D..10BA8
8439 PSALTER_PAHLAVI, // 10BA9..10BAF
8440 UNKNOWN, // 10BB0..10BFF
8441 OLD_TURKIC, // 10C00..10C48
8442 UNKNOWN, // 10C49..10C7F
8443 OLD_HUNGARIAN, // 10C80..10CB2
8444 UNKNOWN, // 10CB3..10CBF
8445 OLD_HUNGARIAN, // 10CC0..10CF2
8446 UNKNOWN, // 10CF3..10CF9
8447 OLD_HUNGARIAN, // 10CFA..10CFF
8448 HANIFI_ROHINGYA, // 10D00..10D27
8449 UNKNOWN, // 10D28..10D2F
8450 HANIFI_ROHINGYA, // 10D30..10D39
8451 UNKNOWN, // 10D3A..10D3F
8452 GARAY, // 10D40..10D65
8453 UNKNOWN, // 10D66..10D68
8454 GARAY, // 10D69..10D85
8455 UNKNOWN, // 10D86..10D8D
8456 GARAY, // 10D8E..10D8F
8457 UNKNOWN, // 10D90..10E5F
8458 ARABIC, // 10E60..10E7E
8459 UNKNOWN, // 10E7F
8460 YEZIDI, // 10E80..10EA9
8461 UNKNOWN, // 10EAA
8462 YEZIDI, // 10EAB..10EAD
8463 UNKNOWN, // 10EAE..10EAF
8464 YEZIDI, // 10EB0..10EB1
8465 UNKNOWN, // 10EB2..10EC1
8466 ARABIC, // 10EC2..10EC7
8467 UNKNOWN, // 10EC8..10ECF
8468 ARABIC, // 10ED0..10ED8
8469 UNKNOWN, // 10ED9..10EF9
8470 ARABIC, // 10EFA..10EFF
8471 OLD_SOGDIAN, // 10F00..10F27
8472 UNKNOWN, // 10F28..10F2F
8473 SOGDIAN, // 10F30..10F59
8474 UNKNOWN, // 10F5A..10F6F
8475 OLD_UYGHUR, // 10F70..10F89
8476 UNKNOWN, // 10F8A..10FAF
8477 CHORASMIAN, // 10FB0..10FCB
8478 UNKNOWN, // 10FCC..10FDF
8479 ELYMAIC, // 10FE0..10FF6
8480 UNKNOWN, // 10FF7..10FFF
8481 BRAHMI, // 11000..1104D
8482 UNKNOWN, // 1104E..11051
8483 BRAHMI, // 11052..11075
8484 UNKNOWN, // 11076..1107E
8485 BRAHMI, // 1107F
8486 KAITHI, // 11080..110C2
8487 UNKNOWN, // 110C3..110CC
8488 KAITHI, // 110CD
8489 UNKNOWN, // 110CE..110CF
8490 SORA_SOMPENG, // 110D0..110E8
8491 UNKNOWN, // 110E9..110EF
8492 SORA_SOMPENG, // 110F0..110F9
8493 UNKNOWN, // 110FA..110FF
8494 CHAKMA, // 11100..11134
8495 UNKNOWN, // 11135
8496 CHAKMA, // 11136..11147
8497 UNKNOWN, // 11148..1114F
8498 MAHAJANI, // 11150..11176
8499 UNKNOWN, // 11177..1117F
8500 SHARADA, // 11180..111DF
8501 UNKNOWN, // 111E0
8502 SINHALA, // 111E1..111F4
8503 UNKNOWN, // 111F5..111FF
8504 KHOJKI, // 11200..11211
8505 UNKNOWN, // 11212
8506 KHOJKI, // 11213..11241
8507 UNKNOWN, // 11242..1127F
8508 MULTANI, // 11280..11286
8509 UNKNOWN, // 11287
8510 MULTANI, // 11288
8511 UNKNOWN, // 11289
8512 MULTANI, // 1128A..1128D
8513 UNKNOWN, // 1128E
8514 MULTANI, // 1128F..1129D
8515 UNKNOWN, // 1129E
8516 MULTANI, // 1129F..112A9
8517 UNKNOWN, // 112AA..112AF
8518 KHUDAWADI, // 112B0..112EA
8519 UNKNOWN, // 112EB..112EF
8520 KHUDAWADI, // 112F0..112F9
8521 UNKNOWN, // 112FA..112FF
8522 GRANTHA, // 11300..11303
8523 UNKNOWN, // 11304
8524 GRANTHA, // 11305..1130C
8525 UNKNOWN, // 1130D..1130E
8526 GRANTHA, // 1130F..11310
8527 UNKNOWN, // 11311..11312
8528 GRANTHA, // 11313..11328
8529 UNKNOWN, // 11329
8530 GRANTHA, // 1132A..11330
8531 UNKNOWN, // 11331
8532 GRANTHA, // 11332..11333
8533 UNKNOWN, // 11334
8534 GRANTHA, // 11335..11339
8535 UNKNOWN, // 1133A
8536 INHERITED, // 1133B
8537 GRANTHA, // 1133C..11344
8538 UNKNOWN, // 11345..11346
8539 GRANTHA, // 11347..11348
8540 UNKNOWN, // 11349..1134A
8541 GRANTHA, // 1134B..1134D
8542 UNKNOWN, // 1134E..1134F
8543 GRANTHA, // 11350
8544 UNKNOWN, // 11351..11356
8545 GRANTHA, // 11357
8546 UNKNOWN, // 11358..1135C
8547 GRANTHA, // 1135D..11363
8548 UNKNOWN, // 11364..11365
8549 GRANTHA, // 11366..1136C
8550 UNKNOWN, // 1136D..1136F
8551 GRANTHA, // 11370..11374
8552 UNKNOWN, // 11375..1137F
8553 TULU_TIGALARI, // 11380..11389
8554 UNKNOWN, // 1138A
8555 TULU_TIGALARI, // 1138B
8556 UNKNOWN, // 1138C..1138D
8557 TULU_TIGALARI, // 1138E
8558 UNKNOWN, // 1138F
8559 TULU_TIGALARI, // 11390..113B5
8560 UNKNOWN, // 113B6
8561 TULU_TIGALARI, // 113B7..113C0
8562 UNKNOWN, // 113C1
8563 TULU_TIGALARI, // 113C2
8564 UNKNOWN, // 113C3..113C4
8565 TULU_TIGALARI, // 113C5
8566 UNKNOWN, // 113C6
8567 TULU_TIGALARI, // 113C7..113CA
8568 UNKNOWN, // 113CB
8569 TULU_TIGALARI, // 113CC..113D5
8570 UNKNOWN, // 113D6
8571 TULU_TIGALARI, // 113D7..113D8
8572 UNKNOWN, // 113D9..113E0
8573 TULU_TIGALARI, // 113E1..113E2
8574 UNKNOWN, // 113E3..113FF
8575 NEWA, // 11400..1145B
8576 UNKNOWN, // 1145C
8577 NEWA, // 1145D..11461
8578 UNKNOWN, // 11462..1147F
8579 TIRHUTA, // 11480..114C7
8580 UNKNOWN, // 114C8..114CF
8581 TIRHUTA, // 114D0..114D9
8582 UNKNOWN, // 114DA..1157F
8583 SIDDHAM, // 11580..115B5
8584 UNKNOWN, // 115B6..115B7
8585 SIDDHAM, // 115B8..115DD
8586 UNKNOWN, // 115DE..115FF
8587 MODI, // 11600..11644
8588 UNKNOWN, // 11645..1164F
8589 MODI, // 11650..11659
8590 UNKNOWN, // 1165A..1165F
8591 MONGOLIAN, // 11660..1166C
8592 UNKNOWN, // 1166D..1167F
8593 TAKRI, // 11680..116B9
8594 UNKNOWN, // 116BA..116BF
8595 TAKRI, // 116C0..116C9
8596 UNKNOWN, // 116CA..116CF
8597 MYANMAR, // 116D0..116E3
8598 UNKNOWN, // 116E4..116FF
8599 AHOM, // 11700..1171A
8600 UNKNOWN, // 1171B..1171C
8601 AHOM, // 1171D..1172B
8602 UNKNOWN, // 1172C..1172F
8603 AHOM, // 11730..11746
8604 UNKNOWN, // 11747..117FF
8605 DOGRA, // 11800..1183B
8606 UNKNOWN, // 1183C..1189F
8607 WARANG_CITI, // 118A0..118F2
8608 UNKNOWN, // 118F3..118FE
8609 WARANG_CITI, // 118FF
8610 DIVES_AKURU, // 11900..11906
8611 UNKNOWN, // 11907..11908
8612 DIVES_AKURU, // 11909
8613 UNKNOWN, // 1190A..1190B
8614 DIVES_AKURU, // 1190C..11913
8615 UNKNOWN, // 11914
8616 DIVES_AKURU, // 11915..11916
8617 UNKNOWN, // 11917
8618 DIVES_AKURU, // 11918..11935
8619 UNKNOWN, // 11936
8620 DIVES_AKURU, // 11937..11938
8621 UNKNOWN, // 11939..1193A
8622 DIVES_AKURU, // 1193B..11946
8623 UNKNOWN, // 11947..1194F
8624 DIVES_AKURU, // 11950..11959
8625 UNKNOWN, // 1195A..1199F
8626 NANDINAGARI, // 119A0..119A7
8627 UNKNOWN, // 119A8..119A9
8628 NANDINAGARI, // 119AA..119D7
8629 UNKNOWN, // 119D8..119D9
8630 NANDINAGARI, // 119DA..119E4
8631 UNKNOWN, // 119E5..119FF
8632 ZANABAZAR_SQUARE, // 11A00..11A47
8633 UNKNOWN, // 11A48..11A4F
8634 SOYOMBO, // 11A50..11AA2
8635 UNKNOWN, // 11AA3..11AAF
8636 CANADIAN_ABORIGINAL, // 11AB0..11ABF
8637 PAU_CIN_HAU, // 11AC0..11AF8
8638 UNKNOWN, // 11AF9..11AFF
8639 DEVANAGARI, // 11B00..11B09
8640 UNKNOWN, // 11B0A..11B5F
8641 SHARADA, // 11B60..11B67
8642 UNKNOWN, // 11B68..11BBF
8643 SUNUWAR, // 11BC0..11BE1
8644 UNKNOWN, // 11BE2..11BEF
8645 SUNUWAR, // 11BF0..11BF9
8646 UNKNOWN, // 11BFA..11BFF
8647 BHAIKSUKI, // 11C00..11C08
8648 UNKNOWN, // 11C09
8649 BHAIKSUKI, // 11C0A..11C36
8650 UNKNOWN, // 11C37
8651 BHAIKSUKI, // 11C38..11C45
8652 UNKNOWN, // 11C46..11C4F
8653 BHAIKSUKI, // 11C50..11C6C
8654 UNKNOWN, // 11C6D..11C6F
8655 MARCHEN, // 11C70..11C8F
8656 UNKNOWN, // 11C90..11C91
8657 MARCHEN, // 11C92..11CA7
8658 UNKNOWN, // 11CA8
8659 MARCHEN, // 11CA9..11CB6
8660 UNKNOWN, // 11CB7..11CFF
8661 MASARAM_GONDI, // 11D00..11D06
8662 UNKNOWN, // 11D07
8663 MASARAM_GONDI, // 11D08..11D09
8664 UNKNOWN, // 11D0A
8665 MASARAM_GONDI, // 11D0B..11D36
8666 UNKNOWN, // 11D37..11D39
8667 MASARAM_GONDI, // 11D3A
8668 UNKNOWN, // 11D3B
8669 MASARAM_GONDI, // 11D3C..11D3D
8670 UNKNOWN, // 11D3E
8671 MASARAM_GONDI, // 11D3F..11D47
8672 UNKNOWN, // 11D48..11D4F
8673 MASARAM_GONDI, // 11D50..11D59
8674 UNKNOWN, // 11D5A..11D5F
8675 GUNJALA_GONDI, // 11D60..11D65
8676 UNKNOWN, // 11D66
8677 GUNJALA_GONDI, // 11D67..11D68
8678 UNKNOWN, // 11D69
8679 GUNJALA_GONDI, // 11D6A..11D8E
8680 UNKNOWN, // 11D8F
8681 GUNJALA_GONDI, // 11D90..11D91
8682 UNKNOWN, // 11D92
8683 GUNJALA_GONDI, // 11D93..11D98
8684 UNKNOWN, // 11D99..11D9F
8685 GUNJALA_GONDI, // 11DA0..11DA9
8686 UNKNOWN, // 11DAA..11DAF
8687 TOLONG_SIKI, // 11DB0..11DDB
8688 UNKNOWN, // 11DDC..11DDF
8689 TOLONG_SIKI, // 11DE0..11DE9
8690 UNKNOWN, // 11DEA..11EDF
8691 MAKASAR, // 11EE0..11EF8
8692 UNKNOWN, // 11EF9..11EFF
8693 KAWI, // 11F00..11F10
8694 UNKNOWN, // 11F11
8695 KAWI, // 11F12..11F3A
8696 UNKNOWN, // 11F3B..11F3D
8697 KAWI, // 11F3E..11F5A
8698 UNKNOWN, // 11F5B..11FAF
8699 LISU, // 11FB0
8700 UNKNOWN, // 11FB1..11FBF
8701 TAMIL, // 11FC0..11FF1
8702 UNKNOWN, // 11FF2..11FFE
8703 TAMIL, // 11FFF
8704 CUNEIFORM, // 12000..12399
8705 UNKNOWN, // 1239A..123FF
8706 CUNEIFORM, // 12400..1246E
8707 UNKNOWN, // 1246F
8708 CUNEIFORM, // 12470..12474
8709 UNKNOWN, // 12475..1247F
8710 CUNEIFORM, // 12480..12543
8711 UNKNOWN, // 12544..12F8F
8712 CYPRO_MINOAN, // 12F90..12FF2
8713 UNKNOWN, // 12FF3..12FFF
8714 EGYPTIAN_HIEROGLYPHS, // 13000..13455
8715 UNKNOWN, // 13456..1345F
8716 EGYPTIAN_HIEROGLYPHS, // 13460..143FA
8717 UNKNOWN, // 143FB..143FF
8718 ANATOLIAN_HIEROGLYPHS, // 14400..14646
8719 UNKNOWN, // 14647..160FF
8720 GURUNG_KHEMA, // 16100..16139
8721 UNKNOWN, // 1613A..167FF
8722 BAMUM, // 16800..16A38
8723 UNKNOWN, // 16A39..16A3F
8724 MRO, // 16A40..16A5E
8725 UNKNOWN, // 16A5F
8726 MRO, // 16A60..16A69
8727 UNKNOWN, // 16A6A..16A6D
8728 MRO, // 16A6E..16A6F
8729 TANGSA, // 16A70..16ABE
8730 UNKNOWN, // 16ABF
8731 TANGSA, // 16AC0..16AC9
8732 UNKNOWN, // 16ACA..16ACF
8733 BASSA_VAH, // 16AD0..16AED
8734 UNKNOWN, // 16AEE..16AEF
8735 BASSA_VAH, // 16AF0..16AF5
8736 UNKNOWN, // 16AF6..16AFF
8737 PAHAWH_HMONG, // 16B00..16B45
8738 UNKNOWN, // 16B46..16B4F
8739 PAHAWH_HMONG, // 16B50..16B59
8740 UNKNOWN, // 16B5A
8741 PAHAWH_HMONG, // 16B5B..16B61
8742 UNKNOWN, // 16B62
8743 PAHAWH_HMONG, // 16B63..16B77
8744 UNKNOWN, // 16B78..16B7C
8745 PAHAWH_HMONG, // 16B7D..16B8F
8746 UNKNOWN, // 16B90..16D3F
8747 KIRAT_RAI, // 16D40..16D79
8748 UNKNOWN, // 16D7A..16E3F
8749 MEDEFAIDRIN, // 16E40..16E9A
8750 UNKNOWN, // 16E9B..16E9F
8751 BERIA_ERFE, // 16EA0..16EB8
8752 UNKNOWN, // 16EB9..16EBA
8753 BERIA_ERFE, // 16EBB..16ED3
8754 UNKNOWN, // 16ED4..16EFF
8755 MIAO, // 16F00..16F4A
8756 UNKNOWN, // 16F4B..16F4E
8757 MIAO, // 16F4F..16F87
8758 UNKNOWN, // 16F88..16F8E
8759 MIAO, // 16F8F..16F9F
8760 UNKNOWN, // 16FA0..16FDF
8761 TANGUT, // 16FE0
8762 NUSHU, // 16FE1
8763 HAN, // 16FE2..16FE3
8764 KHITAN_SMALL_SCRIPT, // 16FE4
8765 UNKNOWN, // 16FE5..16FEF
8766 HAN, // 16FF0..16FF6
8767 UNKNOWN, // 16FF7..16FFF
8768 TANGUT, // 17000..18AFF
8769 KHITAN_SMALL_SCRIPT, // 18B00..18CD5
8770 UNKNOWN, // 18CD6..18CFE
8771 KHITAN_SMALL_SCRIPT, // 18CFF
8772 TANGUT, // 18D00..18D1E
8773 UNKNOWN, // 18D1F..18D7F
8774 TANGUT, // 18D80..18DF2
8775 UNKNOWN, // 18DF3..1AFEF
8776 KATAKANA, // 1AFF0..1AFF3
8777 UNKNOWN, // 1AFF4
8778 KATAKANA, // 1AFF5..1AFFB
8779 UNKNOWN, // 1AFFC
8780 KATAKANA, // 1AFFD..1AFFE
8781 UNKNOWN, // 1AFFF
8782 KATAKANA, // 1B000
8783 HIRAGANA, // 1B001..1B11F
8784 KATAKANA, // 1B120..1B122
8785 UNKNOWN, // 1B123..1B131
8786 HIRAGANA, // 1B132
8787 UNKNOWN, // 1B133..1B14F
8788 HIRAGANA, // 1B150..1B152
8789 UNKNOWN, // 1B153..1B154
8790 KATAKANA, // 1B155
8791 UNKNOWN, // 1B156..1B163
8792 KATAKANA, // 1B164..1B167
8793 UNKNOWN, // 1B168..1B16F
8794 NUSHU, // 1B170..1B2FB
8795 UNKNOWN, // 1B2FC..1BBFF
8796 DUPLOYAN, // 1BC00..1BC6A
8797 UNKNOWN, // 1BC6B..1BC6F
8798 DUPLOYAN, // 1BC70..1BC7C
8799 UNKNOWN, // 1BC7D..1BC7F
8800 DUPLOYAN, // 1BC80..1BC88
8801 UNKNOWN, // 1BC89..1BC8F
8802 DUPLOYAN, // 1BC90..1BC99
8803 UNKNOWN, // 1BC9A..1BC9B
8804 DUPLOYAN, // 1BC9C..1BC9F
8805 COMMON, // 1BCA0..1BCA3
8806 UNKNOWN, // 1BCA4..1CBFF
8807 COMMON, // 1CC00..1CCFC
8808 UNKNOWN, // 1CCFD..1CCFF
8809 COMMON, // 1CD00..1CEB3
8810 UNKNOWN, // 1CEB4..1CEB9
8811 COMMON, // 1CEBA..1CED0
8812 UNKNOWN, // 1CED1..1CEDF
8813 COMMON, // 1CEE0..1CEF0
8814 UNKNOWN, // 1CEF1..1CEFF
8815 INHERITED, // 1CF00..1CF2D
8816 UNKNOWN, // 1CF2E..1CF2F
8817 INHERITED, // 1CF30..1CF46
8818 UNKNOWN, // 1CF47..1CF4F
8819 COMMON, // 1CF50..1CFC3
8820 UNKNOWN, // 1CFC4..1CFFF
8821 COMMON, // 1D000..1D0F5
8822 UNKNOWN, // 1D0F6..1D0FF
8823 COMMON, // 1D100..1D126
8824 UNKNOWN, // 1D127..1D128
8825 COMMON, // 1D129..1D166
8826 INHERITED, // 1D167..1D169
8827 COMMON, // 1D16A..1D17A
8828 INHERITED, // 1D17B..1D182
8829 COMMON, // 1D183..1D184
8830 INHERITED, // 1D185..1D18B
8831 COMMON, // 1D18C..1D1A9
8832 INHERITED, // 1D1AA..1D1AD
8833 COMMON, // 1D1AE..1D1EA
8834 UNKNOWN, // 1D1EB..1D1FF
8835 GREEK, // 1D200..1D245
8836 UNKNOWN, // 1D246..1D2BF
8837 COMMON, // 1D2C0..1D2D3
8838 UNKNOWN, // 1D2D4..1D2DF
8839 COMMON, // 1D2E0..1D2F3
8840 UNKNOWN, // 1D2F4..1D2FF
8841 COMMON, // 1D300..1D356
8842 UNKNOWN, // 1D357..1D35F
8843 COMMON, // 1D360..1D378
8844 UNKNOWN, // 1D379..1D3FF
8845 COMMON, // 1D400..1D454
8846 UNKNOWN, // 1D455
8847 COMMON, // 1D456..1D49C
8848 UNKNOWN, // 1D49D
8849 COMMON, // 1D49E..1D49F
8850 UNKNOWN, // 1D4A0..1D4A1
8851 COMMON, // 1D4A2
8852 UNKNOWN, // 1D4A3..1D4A4
8853 COMMON, // 1D4A5..1D4A6
8854 UNKNOWN, // 1D4A7..1D4A8
8855 COMMON, // 1D4A9..1D4AC
8856 UNKNOWN, // 1D4AD
8857 COMMON, // 1D4AE..1D4B9
8858 UNKNOWN, // 1D4BA
8859 COMMON, // 1D4BB
8860 UNKNOWN, // 1D4BC
8861 COMMON, // 1D4BD..1D4C3
8862 UNKNOWN, // 1D4C4
8863 COMMON, // 1D4C5..1D505
8864 UNKNOWN, // 1D506
8865 COMMON, // 1D507..1D50A
8866 UNKNOWN, // 1D50B..1D50C
8867 COMMON, // 1D50D..1D514
8868 UNKNOWN, // 1D515
8869 COMMON, // 1D516..1D51C
8870 UNKNOWN, // 1D51D
8871 COMMON, // 1D51E..1D539
8872 UNKNOWN, // 1D53A
8873 COMMON, // 1D53B..1D53E
8874 UNKNOWN, // 1D53F
8875 COMMON, // 1D540..1D544
8876 UNKNOWN, // 1D545
8877 COMMON, // 1D546
8878 UNKNOWN, // 1D547..1D549
8879 COMMON, // 1D54A..1D550
8880 UNKNOWN, // 1D551
8881 COMMON, // 1D552..1D6A5
8882 UNKNOWN, // 1D6A6..1D6A7
8883 COMMON, // 1D6A8..1D7CB
8884 UNKNOWN, // 1D7CC..1D7CD
8885 COMMON, // 1D7CE..1D7FF
8886 SIGNWRITING, // 1D800..1DA8B
8887 UNKNOWN, // 1DA8C..1DA9A
8888 SIGNWRITING, // 1DA9B..1DA9F
8889 UNKNOWN, // 1DAA0
8890 SIGNWRITING, // 1DAA1..1DAAF
8891 UNKNOWN, // 1DAB0..1DEFF
8892 LATIN, // 1DF00..1DF1E
8893 UNKNOWN, // 1DF1F..1DF24
8894 LATIN, // 1DF25..1DF2A
8895 UNKNOWN, // 1DF2B..1DFFF
8896 GLAGOLITIC, // 1E000..1E006
8897 UNKNOWN, // 1E007
8898 GLAGOLITIC, // 1E008..1E018
8899 UNKNOWN, // 1E019..1E01A
8900 GLAGOLITIC, // 1E01B..1E021
8901 UNKNOWN, // 1E022
8902 GLAGOLITIC, // 1E023..1E024
8903 UNKNOWN, // 1E025
8904 GLAGOLITIC, // 1E026..1E02A
8905 UNKNOWN, // 1E02B..1E02F
8906 CYRILLIC, // 1E030..1E06D
8907 UNKNOWN, // 1E06E..1E08E
8908 CYRILLIC, // 1E08F
8909 UNKNOWN, // 1E090..1E0FF
8910 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C
8911 UNKNOWN, // 1E12D..1E12F
8912 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D
8913 UNKNOWN, // 1E13E..1E13F
8914 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149
8915 UNKNOWN, // 1E14A..1E14D
8916 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F
8917 UNKNOWN, // 1E150..1E28F
8918 TOTO, // 1E290..1E2AE
8919 UNKNOWN, // 1E2AF..1E2BF
8920 WANCHO, // 1E2C0..1E2F9
8921 UNKNOWN, // 1E2FA..1E2FE
8922 WANCHO, // 1E2FF
8923 UNKNOWN, // 1E300..1E4CF
8924 NAG_MUNDARI, // 1E4D0..1E4F9
8925 UNKNOWN, // 1E4FA..1E5CF
8926 OL_ONAL, // 1E5D0..1E5FA
8927 UNKNOWN, // 1E5FB..1E5FE
8928 OL_ONAL, // 1E5FF
8929 UNKNOWN, // 1E600..1E6BF
8930 TAI_YO, // 1E6C0..1E6DE
8931 UNKNOWN, // 1E6DF
8932 TAI_YO, // 1E6E0..1E6F5
8933 UNKNOWN, // 1E6F6..1E6FD
8934 TAI_YO, // 1E6FE..1E6FF
8935 UNKNOWN, // 1E700..1E7DF
8936 ETHIOPIC, // 1E7E0..1E7E6
8937 UNKNOWN, // 1E7E7
8938 ETHIOPIC, // 1E7E8..1E7EB
8939 UNKNOWN, // 1E7EC
8940 ETHIOPIC, // 1E7ED..1E7EE
8941 UNKNOWN, // 1E7EF
8942 ETHIOPIC, // 1E7F0..1E7FE
8943 UNKNOWN, // 1E7FF
8944 MENDE_KIKAKUI, // 1E800..1E8C4
8945 UNKNOWN, // 1E8C5..1E8C6
8946 MENDE_KIKAKUI, // 1E8C7..1E8D6
8947 UNKNOWN, // 1E8D7..1E8FF
8948 ADLAM, // 1E900..1E94B
8949 UNKNOWN, // 1E94C..1E94F
8950 ADLAM, // 1E950..1E959
8951 UNKNOWN, // 1E95A..1E95D
8952 ADLAM, // 1E95E..1E95F
8953 UNKNOWN, // 1E960..1EC70
8954 COMMON, // 1EC71..1ECB4
8955 UNKNOWN, // 1ECB5..1ED00
8956 COMMON, // 1ED01..1ED3D
8957 UNKNOWN, // 1ED3E..1EDFF
8958 ARABIC, // 1EE00..1EE03
8959 UNKNOWN, // 1EE04
8960 ARABIC, // 1EE05..1EE1F
8961 UNKNOWN, // 1EE20
8962 ARABIC, // 1EE21..1EE22
8963 UNKNOWN, // 1EE23
8964 ARABIC, // 1EE24
8965 UNKNOWN, // 1EE25..1EE26
8966 ARABIC, // 1EE27
8967 UNKNOWN, // 1EE28
8968 ARABIC, // 1EE29..1EE32
8969 UNKNOWN, // 1EE33
8970 ARABIC, // 1EE34..1EE37
8971 UNKNOWN, // 1EE38
8972 ARABIC, // 1EE39
8973 UNKNOWN, // 1EE3A
8974 ARABIC, // 1EE3B
8975 UNKNOWN, // 1EE3C..1EE41
8976 ARABIC, // 1EE42
8977 UNKNOWN, // 1EE43..1EE46
8978 ARABIC, // 1EE47
8979 UNKNOWN, // 1EE48
8980 ARABIC, // 1EE49
8981 UNKNOWN, // 1EE4A
8982 ARABIC, // 1EE4B
8983 UNKNOWN, // 1EE4C
8984 ARABIC, // 1EE4D..1EE4F
8985 UNKNOWN, // 1EE50
8986 ARABIC, // 1EE51..1EE52
8987 UNKNOWN, // 1EE53
8988 ARABIC, // 1EE54
8989 UNKNOWN, // 1EE55..1EE56
8990 ARABIC, // 1EE57
8991 UNKNOWN, // 1EE58
8992 ARABIC, // 1EE59
8993 UNKNOWN, // 1EE5A
8994 ARABIC, // 1EE5B
8995 UNKNOWN, // 1EE5C
8996 ARABIC, // 1EE5D
8997 UNKNOWN, // 1EE5E
8998 ARABIC, // 1EE5F
8999 UNKNOWN, // 1EE60
9000 ARABIC, // 1EE61..1EE62
9001 UNKNOWN, // 1EE63
9002 ARABIC, // 1EE64
9003 UNKNOWN, // 1EE65..1EE66
9004 ARABIC, // 1EE67..1EE6A
9005 UNKNOWN, // 1EE6B
9006 ARABIC, // 1EE6C..1EE72
9007 UNKNOWN, // 1EE73
9008 ARABIC, // 1EE74..1EE77
9009 UNKNOWN, // 1EE78
9010 ARABIC, // 1EE79..1EE7C
9011 UNKNOWN, // 1EE7D
9012 ARABIC, // 1EE7E
9013 UNKNOWN, // 1EE7F
9014 ARABIC, // 1EE80..1EE89
9015 UNKNOWN, // 1EE8A
9016 ARABIC, // 1EE8B..1EE9B
9017 UNKNOWN, // 1EE9C..1EEA0
9018 ARABIC, // 1EEA1..1EEA3
9019 UNKNOWN, // 1EEA4
9020 ARABIC, // 1EEA5..1EEA9
9021 UNKNOWN, // 1EEAA
9022 ARABIC, // 1EEAB..1EEBB
9023 UNKNOWN, // 1EEBC..1EEEF
9024 ARABIC, // 1EEF0..1EEF1
9025 UNKNOWN, // 1EEF2..1EFFF
9026 COMMON, // 1F000..1F02B
9027 UNKNOWN, // 1F02C..1F02F
9028 COMMON, // 1F030..1F093
9029 UNKNOWN, // 1F094..1F09F
9030 COMMON, // 1F0A0..1F0AE
9031 UNKNOWN, // 1F0AF..1F0B0
9032 COMMON, // 1F0B1..1F0BF
9033 UNKNOWN, // 1F0C0
9034 COMMON, // 1F0C1..1F0CF
9035 UNKNOWN, // 1F0D0
9036 COMMON, // 1F0D1..1F0F5
9037 UNKNOWN, // 1F0F6..1F0FF
9038 COMMON, // 1F100..1F1AD
9039 UNKNOWN, // 1F1AE..1F1E5
9040 COMMON, // 1F1E6..1F1FF
9041 HIRAGANA, // 1F200
9042 COMMON, // 1F201..1F202
9043 UNKNOWN, // 1F203..1F20F
9044 COMMON, // 1F210..1F23B
9045 UNKNOWN, // 1F23C..1F23F
9046 COMMON, // 1F240..1F248
9047 UNKNOWN, // 1F249..1F24F
9048 COMMON, // 1F250..1F251
9049 UNKNOWN, // 1F252..1F25F
9050 COMMON, // 1F260..1F265
9051 UNKNOWN, // 1F266..1F2FF
9052 COMMON, // 1F300..1F6D8
9053 UNKNOWN, // 1F6D9..1F6DB
9054 COMMON, // 1F6DC..1F6EC
9055 UNKNOWN, // 1F6ED..1F6EF
9056 COMMON, // 1F6F0..1F6FC
9057 UNKNOWN, // 1F6FD..1F6FF
9058 COMMON, // 1F700..1F7D9
9059 UNKNOWN, // 1F7DA..1F7DF
9060 COMMON, // 1F7E0..1F7EB
9061 UNKNOWN, // 1F7EC..1F7EF
9062 COMMON, // 1F7F0
9063 UNKNOWN, // 1F7F1..1F7FF
9064 COMMON, // 1F800..1F80B
9065 UNKNOWN, // 1F80C..1F80F
9066 COMMON, // 1F810..1F847
9067 UNKNOWN, // 1F848..1F84F
9068 COMMON, // 1F850..1F859
9069 UNKNOWN, // 1F85A..1F85F
9070 COMMON, // 1F860..1F887
9071 UNKNOWN, // 1F888..1F88F
9072 COMMON, // 1F890..1F8AD
9073 UNKNOWN, // 1F8AE..1F8AF
9074 COMMON, // 1F8B0..1F8BB
9075 UNKNOWN, // 1F8BC..1F8BF
9076 COMMON, // 1F8C0..1F8C1
9077 UNKNOWN, // 1F8C2..1F8CF
9078 COMMON, // 1F8D0..1F8D8
9079 UNKNOWN, // 1F8D9..1F8FF
9080 COMMON, // 1F900..1FA57
9081 UNKNOWN, // 1FA58..1FA5F
9082 COMMON, // 1FA60..1FA6D
9083 UNKNOWN, // 1FA6E..1FA6F
9084 COMMON, // 1FA70..1FA7C
9085 UNKNOWN, // 1FA7D..1FA7F
9086 COMMON, // 1FA80..1FA8A
9087 UNKNOWN, // 1FA8B..1FA8D
9088 COMMON, // 1FA8E..1FAC6
9089 UNKNOWN, // 1FAC7
9090 COMMON, // 1FAC8
9091 UNKNOWN, // 1FAC9..1FACC
9092 COMMON, // 1FACD..1FADC
9093 UNKNOWN, // 1FADD..1FADE
9094 COMMON, // 1FADF..1FAEA
9095 UNKNOWN, // 1FAEB..1FAEE
9096 COMMON, // 1FAEF..1FAF8
9097 UNKNOWN, // 1FAF9..1FAFF
9098 COMMON, // 1FB00..1FB92
9099 UNKNOWN, // 1FB93
9100 COMMON, // 1FB94..1FBFA
9101 UNKNOWN, // 1FBFB..1FFFF
9102 HAN, // 20000..2A6DF
9103 UNKNOWN, // 2A6E0..2A6FF
9104 HAN, // 2A700..2B81D
9105 UNKNOWN, // 2B81E..2B81F
9106 HAN, // 2B820..2CEAD
9107 UNKNOWN, // 2CEAE..2CEAF
9108 HAN, // 2CEB0..2EBE0
9109 UNKNOWN, // 2EBE1..2EBEF
9110 HAN, // 2EBF0..2EE5D
9111 UNKNOWN, // 2EE5E..2F7FF
9112 HAN, // 2F800..2FA1D
9113 UNKNOWN, // 2FA1E..2FFFF
9114 HAN, // 30000..3134A
9115 UNKNOWN, // 3134B..3134F
9116 HAN, // 31350..33479
9117 UNKNOWN, // 3347A..E0000
9118 COMMON, // E0001
9119 UNKNOWN, // E0002..E001F
9120 COMMON, // E0020..E007F
9121 UNKNOWN, // E0080..E00FF
9122 INHERITED, // E0100..E01EF
9123 UNKNOWN, // E01F0..10FFFF
9124 };
9125
9126 private static final HashMap<String, Character.UnicodeScript> aliases;
9127 static {
9128 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1);
9129 aliases.put("ADLM", ADLAM);
9130 aliases.put("AGHB", CAUCASIAN_ALBANIAN);
9131 aliases.put("AHOM", AHOM);
9132 aliases.put("ARAB", ARABIC);
9133 aliases.put("ARMI", IMPERIAL_ARAMAIC);
9134 aliases.put("ARMN", ARMENIAN);
9135 aliases.put("AVST", AVESTAN);
9136 aliases.put("BALI", BALINESE);
9137 aliases.put("BAMU", BAMUM);
9138 aliases.put("BASS", BASSA_VAH);
9139 aliases.put("BATK", BATAK);
9140 aliases.put("BENG", BENGALI);
9141 aliases.put("BERF", BERIA_ERFE);
9142 aliases.put("BHKS", BHAIKSUKI);
9143 aliases.put("BOPO", BOPOMOFO);
9144 aliases.put("BRAH", BRAHMI);
9145 aliases.put("BRAI", BRAILLE);
9146 aliases.put("BUGI", BUGINESE);
9147 aliases.put("BUHD", BUHID);
9148 aliases.put("CAKM", CHAKMA);
9149 aliases.put("CANS", CANADIAN_ABORIGINAL);
9150 aliases.put("CARI", CARIAN);
9151 aliases.put("CHAM", CHAM);
9152 aliases.put("CHER", CHEROKEE);
9153 aliases.put("CHRS", CHORASMIAN);
9154 aliases.put("COPT", COPTIC);
9155 aliases.put("CPMN", CYPRO_MINOAN);
9156 aliases.put("CPRT", CYPRIOT);
9157 aliases.put("CYRL", CYRILLIC);
9158 aliases.put("DEVA", DEVANAGARI);
9159 aliases.put("DIAK", DIVES_AKURU);
9160 aliases.put("DOGR", DOGRA);
9161 aliases.put("DSRT", DESERET);
9162 aliases.put("DUPL", DUPLOYAN);
9163 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
9164 aliases.put("ELBA", ELBASAN);
9165 aliases.put("ELYM", ELYMAIC);
9166 aliases.put("ETHI", ETHIOPIC);
9167 aliases.put("GARA", GARAY);
9168 aliases.put("GEOR", GEORGIAN);
9169 aliases.put("GLAG", GLAGOLITIC);
9170 aliases.put("GONG", GUNJALA_GONDI);
9171 aliases.put("GONM", MASARAM_GONDI);
9172 aliases.put("GOTH", GOTHIC);
9173 aliases.put("GRAN", GRANTHA);
9174 aliases.put("GREK", GREEK);
9175 aliases.put("GUJR", GUJARATI);
9176 aliases.put("GUKH", GURUNG_KHEMA);
9177 aliases.put("GURU", GURMUKHI);
9178 aliases.put("HANG", HANGUL);
9179 aliases.put("HANI", HAN);
9180 aliases.put("HANO", HANUNOO);
9181 aliases.put("HATR", HATRAN);
9182 aliases.put("HEBR", HEBREW);
9183 aliases.put("HIRA", HIRAGANA);
9184 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
9185 aliases.put("HMNG", PAHAWH_HMONG);
9186 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
9187 aliases.put("HUNG", OLD_HUNGARIAN);
9188 aliases.put("ITAL", OLD_ITALIC);
9189 aliases.put("JAVA", JAVANESE);
9190 aliases.put("KALI", KAYAH_LI);
9191 aliases.put("KANA", KATAKANA);
9192 aliases.put("KAWI", KAWI);
9193 aliases.put("KHAR", KHAROSHTHI);
9194 aliases.put("KHMR", KHMER);
9195 aliases.put("KHOJ", KHOJKI);
9196 aliases.put("KITS", KHITAN_SMALL_SCRIPT);
9197 aliases.put("KNDA", KANNADA);
9198 aliases.put("KRAI", KIRAT_RAI);
9199 aliases.put("KTHI", KAITHI);
9200 aliases.put("LANA", TAI_THAM);
9201 aliases.put("LAOO", LAO);
9202 aliases.put("LATN", LATIN);
9203 aliases.put("LEPC", LEPCHA);
9204 aliases.put("LIMB", LIMBU);
9205 aliases.put("LINA", LINEAR_A);
9206 aliases.put("LINB", LINEAR_B);
9207 aliases.put("LISU", LISU);
9208 aliases.put("LYCI", LYCIAN);
9209 aliases.put("LYDI", LYDIAN);
9210 aliases.put("MAHJ", MAHAJANI);
9211 aliases.put("MAKA", MAKASAR);
9212 aliases.put("MAND", MANDAIC);
9213 aliases.put("MANI", MANICHAEAN);
9214 aliases.put("MARC", MARCHEN);
9215 aliases.put("MEDF", MEDEFAIDRIN);
9216 aliases.put("MEND", MENDE_KIKAKUI);
9217 aliases.put("MERC", MEROITIC_CURSIVE);
9218 aliases.put("MERO", MEROITIC_HIEROGLYPHS);
9219 aliases.put("MLYM", MALAYALAM);
9220 aliases.put("MODI", MODI);
9221 aliases.put("MONG", MONGOLIAN);
9222 aliases.put("MROO", MRO);
9223 aliases.put("MTEI", MEETEI_MAYEK);
9224 aliases.put("MULT", MULTANI);
9225 aliases.put("MYMR", MYANMAR);
9226 aliases.put("NAGM", NAG_MUNDARI);
9227 aliases.put("NAND", NANDINAGARI);
9228 aliases.put("NARB", OLD_NORTH_ARABIAN);
9229 aliases.put("NBAT", NABATAEAN);
9230 aliases.put("NEWA", NEWA);
9231 aliases.put("NKOO", NKO);
9232 aliases.put("NSHU", NUSHU);
9233 aliases.put("OGAM", OGHAM);
9234 aliases.put("OLCK", OL_CHIKI);
9235 aliases.put("ONAO", OL_ONAL);
9236 aliases.put("ORKH", OLD_TURKIC);
9237 aliases.put("ORYA", ORIYA);
9238 aliases.put("OSGE", OSAGE);
9239 aliases.put("OSMA", OSMANYA);
9240 aliases.put("OUGR", OLD_UYGHUR);
9241 aliases.put("PALM", PALMYRENE);
9242 aliases.put("PAUC", PAU_CIN_HAU);
9243 aliases.put("PERM", OLD_PERMIC);
9244 aliases.put("PHAG", PHAGS_PA);
9245 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
9246 aliases.put("PHLP", PSALTER_PAHLAVI);
9247 aliases.put("PHNX", PHOENICIAN);
9248 aliases.put("PLRD", MIAO);
9249 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
9250 aliases.put("RJNG", REJANG);
9251 aliases.put("ROHG", HANIFI_ROHINGYA);
9252 aliases.put("RUNR", RUNIC);
9253 aliases.put("SAMR", SAMARITAN);
9254 aliases.put("SARB", OLD_SOUTH_ARABIAN);
9255 aliases.put("SAUR", SAURASHTRA);
9256 aliases.put("SGNW", SIGNWRITING);
9257 aliases.put("SHAW", SHAVIAN);
9258 aliases.put("SHRD", SHARADA);
9259 aliases.put("SIDD", SIDDHAM);
9260 aliases.put("SIDT", SIDETIC);
9261 aliases.put("SIND", KHUDAWADI);
9262 aliases.put("SINH", SINHALA);
9263 aliases.put("SOGD", SOGDIAN);
9264 aliases.put("SOGO", OLD_SOGDIAN);
9265 aliases.put("SORA", SORA_SOMPENG);
9266 aliases.put("SOYO", SOYOMBO);
9267 aliases.put("SUND", SUNDANESE);
9268 aliases.put("SUNU", SUNUWAR);
9269 aliases.put("SYLO", SYLOTI_NAGRI);
9270 aliases.put("SYRC", SYRIAC);
9271 aliases.put("TAGB", TAGBANWA);
9272 aliases.put("TAKR", TAKRI);
9273 aliases.put("TALE", TAI_LE);
9274 aliases.put("TALU", NEW_TAI_LUE);
9275 aliases.put("TAML", TAMIL);
9276 aliases.put("TANG", TANGUT);
9277 aliases.put("TAVT", TAI_VIET);
9278 aliases.put("TAYO", TAI_YO);
9279 aliases.put("TELU", TELUGU);
9280 aliases.put("TFNG", TIFINAGH);
9281 aliases.put("TGLG", TAGALOG);
9282 aliases.put("THAA", THAANA);
9283 aliases.put("THAI", THAI);
9284 aliases.put("TIBT", TIBETAN);
9285 aliases.put("TIRH", TIRHUTA);
9286 aliases.put("TNSA", TANGSA);
9287 aliases.put("TODR", TODHRI);
9288 aliases.put("TOLS", TOLONG_SIKI);
9289 aliases.put("TOTO", TOTO);
9290 aliases.put("TUTG", TULU_TIGALARI);
9291 aliases.put("UGAR", UGARITIC);
9292 aliases.put("VAII", VAI);
9293 aliases.put("VITH", VITHKUQI);
9294 aliases.put("WARA", WARANG_CITI);
9295 aliases.put("WCHO", WANCHO);
9296 aliases.put("XPEO", OLD_PERSIAN);
9297 aliases.put("XSUX", CUNEIFORM);
9298 aliases.put("YEZI", YEZIDI);
9299 aliases.put("YIII", YI);
9300 aliases.put("ZANB", ZANABAZAR_SQUARE);
9301 aliases.put("ZINH", INHERITED);
9302 aliases.put("ZYYY", COMMON);
9303 aliases.put("ZZZZ", UNKNOWN);
9304 }
9305
9306 /**
9307 * Returns the enum constant representing the Unicode script of which
9308 * the given character (Unicode code point) is assigned to.
9309 *
9310 * @param codePoint the character (Unicode code point) in question.
9311 * @return The {@code UnicodeScript} constant representing the
9312 * Unicode script of which this character is assigned to.
9313 *
9314 * @throws IllegalArgumentException if the specified
9315 * {@code codePoint} is an invalid Unicode code point.
9316 * @see Character#isValidCodePoint(int)
9317 *
9318 */
9319 public static UnicodeScript of(int codePoint) {
9320 if (!isValidCodePoint(codePoint))
9321 throw new IllegalArgumentException(
9322 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9323 int type = getType(codePoint);
9324 // leave SURROGATE and PRIVATE_USE for table lookup
9325 if (type == UNASSIGNED)
9326 return UNKNOWN;
9327 int index = Arrays.binarySearch(scriptStarts, codePoint);
9328 if (index < 0)
9329 index = -index - 2;
9330 return scripts[index];
9331 }
9332
9333 /**
9334 * Returns the UnicodeScript constant with the given Unicode script
9335 * name or the script name alias. Script names and their aliases are
9336 * determined by The Unicode Standard. The files {@code Scripts.txt}
9337 * and {@code PropertyValueAliases.txt} define script names
9338 * and the script name aliases for a particular version of the
9339 * standard. The {@link Character} class specifies the version of
9340 * the standard that it supports.
9341 * <p>
9342 * Character case is ignored for all of the valid script names.
9343 * The en_US locale's case mapping rules are used to provide
9344 * case-insensitive string comparisons for script name validation.
9345 *
9346 * @param scriptName A {@code UnicodeScript} name.
9347 * @return The {@code UnicodeScript} constant identified
9348 * by {@code scriptName}
9349 * @throws IllegalArgumentException if {@code scriptName} is an
9350 * invalid name
9351 * @throws NullPointerException if {@code scriptName} is null
9352 */
9353 public static final UnicodeScript forName(String scriptName) {
9354 scriptName = scriptName.toUpperCase(Locale.ENGLISH);
9355 //.replace(' ', '_'));
9356 UnicodeScript sc = aliases.get(scriptName);
9357 if (sc != null)
9358 return sc;
9359 return valueOf(scriptName);
9360 }
9361 }
9362
9363 /**
9364 * The value of the {@code Character}.
9365 *
9366 * @serial
9367 */
9368 private final char value;
9369
9370 /** use serialVersionUID from JDK 1.0.2 for interoperability */
9371 @java.io.Serial
9372 private static final long serialVersionUID = 3786198910865385080L;
9373
9374 /**
9375 * Constructs a newly allocated {@code Character} object that
9376 * represents the specified {@code char} value.
9377 *
9378 * @param value the value to be represented by the
9379 * {@code Character} object.
9380 *
9381 * @deprecated
9382 * It is rarely appropriate to use this constructor. The static factory
9383 * {@link #valueOf(char)} is generally a better choice, as it is
9384 * likely to yield significantly better space and time performance.
9385 */
9386 @Deprecated(since="9")
9387 public Character(char value) {
9388 this.value = value;
9389 }
9390
9391 private static final class CharacterCache {
9392 private CharacterCache(){}
9393
9394 @Stable
9395 static final Character[] cache;
9396 static Character[] archivedCache;
9397
9398 static {
9399 int size = 127 + 1;
9400
9401 // Load and use the archived cache if it exists
9402 CDS.initializeFromArchive(CharacterCache.class);
9403 if (archivedCache == null) {
9404 Character[] c = new Character[size];
9405 for (int i = 0; i < size; i++) {
9406 c[i] = new Character((char) i);
9407 }
9408 archivedCache = c;
9409 }
9410 cache = archivedCache;
9411 assert cache.length == size;
9412 }
9413 }
9414
9415 /**
9416 * Returns a {@code Character} instance representing the specified
9417 * {@code char} value.
9418 * <div class="preview-block">
9419 * <div class="preview-comment">
9420 * <p>
9421 * - When preview features are NOT enabled, {@code Character} is an identity class.
9422 * If a new {@code Character} instance is not required, this method
9423 * should generally be used in preference to the constructor
9424 * {@link #Character(char)}, as this method is likely to yield
9425 * significantly better space and time performance by caching
9426 * frequently requested values.
9427 * This method will always cache values in the range {@code
9428 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
9429 * cache other values outside of this range.
9430 * </p>
9431 * <p>
9432 * - When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}.
9433 * The {@code valueOf} behavior is the same as invoking the constructor,
9434 * whether cached or not.
9435 * </p>
9436 * </div>
9437 * </div>
9438 *
9439 * @param c a char value.
9440 * @return a {@code Character} instance representing {@code c}.
9441 * @since 1.5
9442 */
9443 @IntrinsicCandidate
9444 @DeserializeConstructor
9445 public static Character valueOf(char c) {
9446 if (c <= 127) { // must cache
9447 return CharacterCache.cache[(int)c];
9448 }
9449 return new Character(c);
9450 }
9451
9452 /**
9453 * Returns the value of this {@code Character} object.
9454 * @return the primitive {@code char} value represented by
9455 * this object.
9456 */
9457 @IntrinsicCandidate
9458 public char charValue() {
9459 return value;
9460 }
9461
9462 /**
9463 * Returns a hash code for this {@code Character}; equal to the result
9464 * of invoking {@code charValue()}.
9465 *
9466 * @return a hash code value for this {@code Character}
9467 */
9468 @Override
9469 public int hashCode() {
9470 return Character.hashCode(value);
9471 }
9472
9473 /**
9474 * Returns a hash code for a {@code char} value; compatible with
9475 * {@code Character.hashCode()}.
9476 *
9477 * @since 1.8
9478 *
9479 * @param value The {@code char} for which to return a hash code.
9480 * @return a hash code value for a {@code char} value.
9481 */
9482 public static int hashCode(char value) {
9483 return (int)value;
9484 }
9485
9486 /**
9487 * Compares this object against the specified object.
9488 * The result is {@code true} if and only if the argument is not
9489 * {@code null} and is a {@code Character} object that
9490 * represents the same {@code char} value as this object.
9491 *
9492 * @param obj the object to compare with.
9493 * @return {@code true} if the objects are the same;
9494 * {@code false} otherwise.
9495 */
9496 public boolean equals(Object obj) {
9497 if (obj instanceof Character c) {
9498 return value == c.charValue();
9499 }
9500 return false;
9501 }
9502
9503 /**
9504 * Returns a {@code String} object representing this
9505 * {@code Character}'s value. The result is a string of
9506 * length 1 whose sole component is the primitive
9507 * {@code char} value represented by this
9508 * {@code Character} object.
9509 *
9510 * @return a string representation of this object.
9511 */
9512 @Override
9513 public String toString() {
9514 return String.valueOf(value);
9515 }
9516
9517 /**
9518 * Returns a {@code String} object representing the
9519 * specified {@code char}. The result is a string of length
9520 * 1 consisting solely of the specified {@code char}.
9521 *
9522 * @apiNote This method cannot handle <a
9523 * href="#supplementary"> supplementary characters</a>. To support
9524 * all Unicode characters, including supplementary characters, use
9525 * the {@link #toString(int)} method.
9526 *
9527 * @param c the {@code char} to be converted
9528 * @return the string representation of the specified {@code char}
9529 * @since 1.4
9530 */
9531 public static String toString(char c) {
9532 return String.valueOf(c);
9533 }
9534
9535 /**
9536 * Returns a {@code String} object representing the
9537 * specified character (Unicode code point). The result is a string of
9538 * length 1 or 2, consisting solely of the specified {@code codePoint}.
9539 *
9540 * @param codePoint the {@code codePoint} to be converted
9541 * @return the string representation of the specified {@code codePoint}
9542 * @throws IllegalArgumentException if the specified
9543 * {@code codePoint} is not a {@linkplain #isValidCodePoint
9544 * valid Unicode code point}.
9545 * @since 11
9546 */
9547 public static String toString(int codePoint) {
9548 return String.valueOfCodePoint(codePoint);
9549 }
9550
9551 /**
9552 * Determines whether the specified code point is a valid
9553 * <a href="http://www.unicode.org/glossary/#code_point">
9554 * Unicode code point value</a>.
9555 *
9556 * @param codePoint the Unicode code point to be tested
9557 * @return {@code true} if the specified code point value is between
9558 * {@link #MIN_CODE_POINT} and
9559 * {@link #MAX_CODE_POINT} inclusive;
9560 * {@code false} otherwise.
9561 * @since 1.5
9562 */
9563 public static boolean isValidCodePoint(int codePoint) {
9564 // Optimized form of:
9565 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
9566 int plane = codePoint >>> 16;
9567 return plane < ((MAX_CODE_POINT + 1) >>> 16);
9568 }
9569
9570 /**
9571 * Determines whether the specified character (Unicode code point)
9572 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
9573 * Such code points can be represented using a single {@code char}.
9574 *
9575 * @param codePoint the character (Unicode code point) to be tested
9576 * @return {@code true} if the specified code point is between
9577 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
9578 * {@code false} otherwise.
9579 * @since 1.7
9580 */
9581 public static boolean isBmpCodePoint(int codePoint) {
9582 return codePoint >>> 16 == 0;
9583 // Optimized form of:
9584 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
9585 // We consistently use logical shift (>>>) to facilitate
9586 // additional runtime optimizations.
9587 }
9588
9589 /**
9590 * Determines whether the specified character (Unicode code point)
9591 * is in the <a href="#supplementary">supplementary character</a> range.
9592 *
9593 * @param codePoint the character (Unicode code point) to be tested
9594 * @return {@code true} if the specified code point is between
9595 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
9596 * {@link #MAX_CODE_POINT} inclusive;
9597 * {@code false} otherwise.
9598 * @since 1.5
9599 */
9600 public static boolean isSupplementaryCodePoint(int codePoint) {
9601 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
9602 && codePoint < MAX_CODE_POINT + 1;
9603 }
9604
9605 /**
9606 * Determines if the given {@code char} value is a
9607 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9608 * Unicode high-surrogate code unit</a>
9609 * (also known as <i>leading-surrogate code unit</i>).
9610 *
9611 * <p>Such values do not represent characters by themselves,
9612 * but are used in the representation of
9613 * <a href="#supplementary">supplementary characters</a>
9614 * in the UTF-16 encoding.
9615 *
9616 * @param ch the {@code char} value to be tested.
9617 * @return {@code true} if the {@code char} value is between
9618 * {@link #MIN_HIGH_SURROGATE} and
9619 * {@link #MAX_HIGH_SURROGATE} inclusive;
9620 * {@code false} otherwise.
9621 * @see Character#isLowSurrogate(char)
9622 * @see Character.UnicodeBlock#of(int)
9623 * @since 1.5
9624 */
9625 public static boolean isHighSurrogate(char ch) {
9626 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
9627 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
9628 }
9629
9630 /**
9631 * Determines if the given {@code char} value is a
9632 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9633 * Unicode low-surrogate code unit</a>
9634 * (also known as <i>trailing-surrogate code unit</i>).
9635 *
9636 * <p>Such values do not represent characters by themselves,
9637 * but are used in the representation of
9638 * <a href="#supplementary">supplementary characters</a>
9639 * in the UTF-16 encoding.
9640 *
9641 * @param ch the {@code char} value to be tested.
9642 * @return {@code true} if the {@code char} value is between
9643 * {@link #MIN_LOW_SURROGATE} and
9644 * {@link #MAX_LOW_SURROGATE} inclusive;
9645 * {@code false} otherwise.
9646 * @see Character#isHighSurrogate(char)
9647 * @since 1.5
9648 */
9649 public static boolean isLowSurrogate(char ch) {
9650 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
9651 }
9652
9653 /**
9654 * Determines if the given {@code char} value is a Unicode
9655 * <i>surrogate code unit</i>.
9656 *
9657 * <p>Such values do not represent characters by themselves,
9658 * but are used in the representation of
9659 * <a href="#supplementary">supplementary characters</a>
9660 * in the UTF-16 encoding.
9661 *
9662 * <p>A char value is a surrogate code unit if and only if it is either
9663 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
9664 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
9665 *
9666 * @param ch the {@code char} value to be tested.
9667 * @return {@code true} if the {@code char} value is between
9668 * {@link #MIN_SURROGATE} and
9669 * {@link #MAX_SURROGATE} inclusive;
9670 * {@code false} otherwise.
9671 * @since 1.7
9672 */
9673 public static boolean isSurrogate(char ch) {
9674 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
9675 }
9676
9677 /**
9678 * Determines whether the specified pair of {@code char}
9679 * values is a valid
9680 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9681 * Unicode surrogate pair</a>.
9682 *
9683 * <p>This method is equivalent to the expression:
9684 * <blockquote><pre>{@code
9685 * isHighSurrogate(high) && isLowSurrogate(low)
9686 * }</pre></blockquote>
9687 *
9688 * @param high the high-surrogate code value to be tested
9689 * @param low the low-surrogate code value to be tested
9690 * @return {@code true} if the specified high and
9691 * low-surrogate code values represent a valid surrogate pair;
9692 * {@code false} otherwise.
9693 * @since 1.5
9694 */
9695 public static boolean isSurrogatePair(char high, char low) {
9696 return isHighSurrogate(high) && isLowSurrogate(low);
9697 }
9698
9699 /**
9700 * Determines the number of {@code char} values needed to
9701 * represent the specified character (Unicode code point). If the
9702 * specified character is equal to or greater than 0x10000, then
9703 * the method returns 2. Otherwise, the method returns 1.
9704 *
9705 * <p>This method doesn't validate the specified character to be a
9706 * valid Unicode code point. The caller must validate the
9707 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
9708 * if necessary.
9709 *
9710 * @param codePoint the character (Unicode code point) to be tested.
9711 * @return 2 if the character is a valid supplementary character; 1 otherwise.
9712 * @see Character#isSupplementaryCodePoint(int)
9713 * @since 1.5
9714 */
9715 public static int charCount(int codePoint) {
9716 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
9717 }
9718
9719 /**
9720 * Converts the specified surrogate pair to its supplementary code
9721 * point value. This method does not validate the specified
9722 * surrogate pair. The caller must validate it using {@link
9723 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
9724 *
9725 * @param high the high-surrogate code unit
9726 * @param low the low-surrogate code unit
9727 * @return the supplementary code point composed from the
9728 * specified surrogate pair.
9729 * @since 1.5
9730 */
9731 public static int toCodePoint(char high, char low) {
9732 // Optimized form of:
9733 // return ((high - MIN_HIGH_SURROGATE) << 10)
9734 // + (low - MIN_LOW_SURROGATE)
9735 // + MIN_SUPPLEMENTARY_CODE_POINT;
9736 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
9737 - (MIN_HIGH_SURROGATE << 10)
9738 - MIN_LOW_SURROGATE);
9739 }
9740
9741 /**
9742 * Returns the code point at the given index of the
9743 * {@code CharSequence}. If the {@code char} value at
9744 * the given index in the {@code CharSequence} is in the
9745 * high-surrogate range, the following index is less than the
9746 * length of the {@code CharSequence}, and the
9747 * {@code char} value at the following index is in the
9748 * low-surrogate range, then the supplementary code point
9749 * corresponding to this surrogate pair is returned. Otherwise,
9750 * the {@code char} value at the given index is returned.
9751 *
9752 * @param seq a sequence of {@code char} values (Unicode code
9753 * units)
9754 * @param index the index to the {@code char} values (Unicode
9755 * code units) in {@code seq} to be converted
9756 * @return the Unicode code point at the given index
9757 * @throws NullPointerException if {@code seq} is null.
9758 * @throws IndexOutOfBoundsException if the value
9759 * {@code index} is negative or not less than
9760 * {@link CharSequence#length() seq.length()}.
9761 * @since 1.5
9762 */
9763 public static int codePointAt(CharSequence seq, int index) {
9764 char c1 = seq.charAt(index);
9765 if (isHighSurrogate(c1) && ++index < seq.length()) {
9766 char c2 = seq.charAt(index);
9767 if (isLowSurrogate(c2)) {
9768 return toCodePoint(c1, c2);
9769 }
9770 }
9771 return c1;
9772 }
9773
9774 /**
9775 * Returns the code point at the given index of the
9776 * {@code char} array. If the {@code char} value at
9777 * the given index in the {@code char} array is in the
9778 * high-surrogate range, the following index is less than the
9779 * length of the {@code char} array, and the
9780 * {@code char} value at the following index is in the
9781 * low-surrogate range, then the supplementary code point
9782 * corresponding to this surrogate pair is returned. Otherwise,
9783 * the {@code char} value at the given index is returned.
9784 *
9785 * @param a the {@code char} array
9786 * @param index the index to the {@code char} values (Unicode
9787 * code units) in the {@code char} array to be converted
9788 * @return the Unicode code point at the given index
9789 * @throws NullPointerException if {@code a} is null.
9790 * @throws IndexOutOfBoundsException if the value
9791 * {@code index} is negative or not less than
9792 * the length of the {@code char} array.
9793 * @since 1.5
9794 */
9795 public static int codePointAt(char[] a, int index) {
9796 return codePointAtImpl(a, index, a.length);
9797 }
9798
9799 /**
9800 * Returns the code point at the given index of the
9801 * {@code char} array, where only array elements with
9802 * {@code index} less than {@code limit} can be used. If
9803 * the {@code char} value at the given index in the
9804 * {@code char} array is in the high-surrogate range, the
9805 * following index is less than the {@code limit}, and the
9806 * {@code char} value at the following index is in the
9807 * low-surrogate range, then the supplementary code point
9808 * corresponding to this surrogate pair is returned. Otherwise,
9809 * the {@code char} value at the given index is returned.
9810 *
9811 * @param a the {@code char} array
9812 * @param index the index to the {@code char} values (Unicode
9813 * code units) in the {@code char} array to be converted
9814 * @param limit the index after the last array element that
9815 * can be used in the {@code char} array
9816 * @return the Unicode code point at the given index
9817 * @throws NullPointerException if {@code a} is null.
9818 * @throws IndexOutOfBoundsException if the {@code index}
9819 * argument is negative or not less than the {@code limit}
9820 * argument, or if the {@code limit} argument is negative or
9821 * greater than the length of the {@code char} array.
9822 * @since 1.5
9823 */
9824 public static int codePointAt(char[] a, int index, int limit) {
9825 if (index >= limit || index < 0 || limit > a.length) {
9826 throw new IndexOutOfBoundsException();
9827 }
9828 return codePointAtImpl(a, index, limit);
9829 }
9830
9831 // throws ArrayIndexOutOfBoundsException if index out of bounds
9832 static int codePointAtImpl(char[] a, int index, int limit) {
9833 char c1 = a[index];
9834 if (isHighSurrogate(c1) && ++index < limit) {
9835 char c2 = a[index];
9836 if (isLowSurrogate(c2)) {
9837 return toCodePoint(c1, c2);
9838 }
9839 }
9840 return c1;
9841 }
9842
9843 /**
9844 * Returns the code point preceding the given index of the
9845 * {@code CharSequence}. If the {@code char} value at
9846 * {@code (index - 1)} in the {@code CharSequence} is in
9847 * the low-surrogate range, {@code (index - 2)} is not
9848 * negative, and the {@code char} value at {@code (index - 2)}
9849 * in the {@code CharSequence} is in the
9850 * high-surrogate range, then the supplementary code point
9851 * corresponding to this surrogate pair is returned. Otherwise,
9852 * the {@code char} value at {@code (index - 1)} is
9853 * returned.
9854 *
9855 * @param seq the {@code CharSequence} instance
9856 * @param index the index following the code point that should be returned
9857 * @return the Unicode code point value before the given index.
9858 * @throws NullPointerException if {@code seq} is null.
9859 * @throws IndexOutOfBoundsException if the {@code index}
9860 * argument is less than 1 or greater than {@link
9861 * CharSequence#length() seq.length()}.
9862 * @since 1.5
9863 */
9864 public static int codePointBefore(CharSequence seq, int index) {
9865 char c2 = seq.charAt(--index);
9866 if (isLowSurrogate(c2) && index > 0) {
9867 char c1 = seq.charAt(--index);
9868 if (isHighSurrogate(c1)) {
9869 return toCodePoint(c1, c2);
9870 }
9871 }
9872 return c2;
9873 }
9874
9875 /**
9876 * Returns the code point preceding the given index of the
9877 * {@code char} array. If the {@code char} value at
9878 * {@code (index - 1)} in the {@code char} array is in
9879 * the low-surrogate range, {@code (index - 2)} is not
9880 * negative, and the {@code char} value at {@code (index - 2)}
9881 * in the {@code char} array is in the
9882 * high-surrogate range, then the supplementary code point
9883 * corresponding to this surrogate pair is returned. Otherwise,
9884 * the {@code char} value at {@code (index - 1)} is
9885 * returned.
9886 *
9887 * @param a the {@code char} array
9888 * @param index the index following the code point that should be returned
9889 * @return the Unicode code point value before the given index.
9890 * @throws NullPointerException if {@code a} is null.
9891 * @throws IndexOutOfBoundsException if the {@code index}
9892 * argument is less than 1 or greater than the length of the
9893 * {@code char} array
9894 * @since 1.5
9895 */
9896 public static int codePointBefore(char[] a, int index) {
9897 return codePointBeforeImpl(a, index, 0);
9898 }
9899
9900 /**
9901 * Returns the code point preceding the given index of the
9902 * {@code char} array, where only array elements with
9903 * {@code index} greater than or equal to {@code start}
9904 * can be used. If the {@code char} value at {@code (index - 1)}
9905 * in the {@code char} array is in the
9906 * low-surrogate range, {@code (index - 2)} is not less than
9907 * {@code start}, and the {@code char} value at
9908 * {@code (index - 2)} in the {@code char} array is in
9909 * the high-surrogate range, then the supplementary code point
9910 * corresponding to this surrogate pair is returned. Otherwise,
9911 * the {@code char} value at {@code (index - 1)} is
9912 * returned.
9913 *
9914 * @param a the {@code char} array
9915 * @param index the index following the code point that should be returned
9916 * @param start the index of the first array element in the
9917 * {@code char} array
9918 * @return the Unicode code point value before the given index.
9919 * @throws NullPointerException if {@code a} is null.
9920 * @throws IndexOutOfBoundsException if the {@code index}
9921 * argument is not greater than the {@code start} argument or
9922 * is greater than the length of the {@code char} array, or
9923 * if the {@code start} argument is negative or not less than
9924 * the length of the {@code char} array.
9925 * @since 1.5
9926 */
9927 public static int codePointBefore(char[] a, int index, int start) {
9928 if (index <= start || start < 0 || index > a.length) {
9929 throw new IndexOutOfBoundsException();
9930 }
9931 return codePointBeforeImpl(a, index, start);
9932 }
9933
9934 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
9935 static int codePointBeforeImpl(char[] a, int index, int start) {
9936 char c2 = a[--index];
9937 if (isLowSurrogate(c2) && index > start) {
9938 char c1 = a[--index];
9939 if (isHighSurrogate(c1)) {
9940 return toCodePoint(c1, c2);
9941 }
9942 }
9943 return c2;
9944 }
9945
9946 /**
9947 * Returns the leading surrogate (a
9948 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9949 * high surrogate code unit</a>) of the
9950 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9951 * surrogate pair</a>
9952 * representing the specified supplementary character (Unicode
9953 * code point) in the UTF-16 encoding. If the specified character
9954 * is not a
9955 * <a href="Character.html#supplementary">supplementary character</a>,
9956 * an unspecified {@code char} is returned.
9957 *
9958 * <p>If
9959 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9960 * is {@code true}, then
9961 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
9962 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
9963 * are also always {@code true}.
9964 *
9965 * @param codePoint a supplementary character (Unicode code point)
9966 * @return the leading surrogate code unit used to represent the
9967 * character in the UTF-16 encoding
9968 * @since 1.7
9969 */
9970 public static char highSurrogate(int codePoint) {
9971 return (char) ((codePoint >>> 10)
9972 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
9973 }
9974
9975 /**
9976 * Returns the trailing surrogate (a
9977 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9978 * low surrogate code unit</a>) of the
9979 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9980 * surrogate pair</a>
9981 * representing the specified supplementary character (Unicode
9982 * code point) in the UTF-16 encoding. If the specified character
9983 * is not a
9984 * <a href="Character.html#supplementary">supplementary character</a>,
9985 * an unspecified {@code char} is returned.
9986 *
9987 * <p>If
9988 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9989 * is {@code true}, then
9990 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
9991 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
9992 * are also always {@code true}.
9993 *
9994 * @param codePoint a supplementary character (Unicode code point)
9995 * @return the trailing surrogate code unit used to represent the
9996 * character in the UTF-16 encoding
9997 * @since 1.7
9998 */
9999 public static char lowSurrogate(int codePoint) {
10000 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
10001 }
10002
10003 /**
10004 * Converts the specified character (Unicode code point) to its
10005 * UTF-16 representation. If the specified code point is a BMP
10006 * (Basic Multilingual Plane or Plane 0) value, the same value is
10007 * stored in {@code dst[dstIndex]}, and 1 is returned. If the
10008 * specified code point is a supplementary character, its
10009 * surrogate values are stored in {@code dst[dstIndex]}
10010 * (high-surrogate) and {@code dst[dstIndex+1]}
10011 * (low-surrogate), and 2 is returned.
10012 *
10013 * @param codePoint the character (Unicode code point) to be converted.
10014 * @param dst an array of {@code char} in which the
10015 * {@code codePoint}'s UTF-16 value is stored.
10016 * @param dstIndex the start index into the {@code dst}
10017 * array where the converted value is stored.
10018 * @return 1 if the code point is a BMP code point, 2 if the
10019 * code point is a supplementary code point.
10020 * @throws IllegalArgumentException if the specified
10021 * {@code codePoint} is not a valid Unicode code point.
10022 * @throws NullPointerException if the specified {@code dst} is null.
10023 * @throws IndexOutOfBoundsException if {@code dstIndex}
10024 * is negative or not less than {@code dst.length}, or if
10025 * {@code dst} at {@code dstIndex} doesn't have enough
10026 * array element(s) to store the resulting {@code char}
10027 * value(s). (If {@code dstIndex} is equal to
10028 * {@code dst.length-1} and the specified
10029 * {@code codePoint} is a supplementary character, the
10030 * high-surrogate value is not stored in
10031 * {@code dst[dstIndex]}.)
10032 * @since 1.5
10033 */
10034 public static int toChars(int codePoint, char[] dst, int dstIndex) {
10035 if (isBmpCodePoint(codePoint)) {
10036 dst[dstIndex] = (char) codePoint;
10037 return 1;
10038 } else if (isValidCodePoint(codePoint)) {
10039 toSurrogates(codePoint, dst, dstIndex);
10040 return 2;
10041 } else {
10042 throw new IllegalArgumentException(
10043 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10044 }
10045 }
10046
10047 /**
10048 * Converts the specified character (Unicode code point) to its
10049 * UTF-16 representation stored in a {@code char} array. If
10050 * the specified code point is a BMP (Basic Multilingual Plane or
10051 * Plane 0) value, the resulting {@code char} array has
10052 * the same value as {@code codePoint}. If the specified code
10053 * point is a supplementary code point, the resulting
10054 * {@code char} array has the corresponding surrogate pair.
10055 *
10056 * @param codePoint a Unicode code point
10057 * @return a {@code char} array having
10058 * {@code codePoint}'s UTF-16 representation.
10059 * @throws IllegalArgumentException if the specified
10060 * {@code codePoint} is not a valid Unicode code point.
10061 * @since 1.5
10062 */
10063 public static char[] toChars(int codePoint) {
10064 if (isBmpCodePoint(codePoint)) {
10065 return new char[] { (char) codePoint };
10066 } else if (isValidCodePoint(codePoint)) {
10067 char[] result = new char[2];
10068 toSurrogates(codePoint, result, 0);
10069 return result;
10070 } else {
10071 throw new IllegalArgumentException(
10072 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10073 }
10074 }
10075
10076 static void toSurrogates(int codePoint, char[] dst, int index) {
10077 // We write elements "backwards" to guarantee all-or-nothing
10078 dst[index+1] = lowSurrogate(codePoint);
10079 dst[index] = highSurrogate(codePoint);
10080 }
10081
10082 /**
10083 * Returns the number of Unicode code points in the text range of
10084 * the specified char sequence. The text range begins at the
10085 * specified {@code beginIndex} and extends to the
10086 * {@code char} at index {@code endIndex - 1}. Thus the
10087 * length (in {@code char}s) of the text range is
10088 * {@code endIndex-beginIndex}. Unpaired surrogates within
10089 * the text range count as one code point each.
10090 *
10091 * @param seq the char sequence
10092 * @param beginIndex the index to the first {@code char} of
10093 * the text range.
10094 * @param endIndex the index after the last {@code char} of
10095 * the text range.
10096 * @return the number of Unicode code points in the specified text
10097 * range
10098 * @throws NullPointerException if {@code seq} is null.
10099 * @throws IndexOutOfBoundsException if the
10100 * {@code beginIndex} is negative, or {@code endIndex}
10101 * is larger than the length of the given sequence, or
10102 * {@code beginIndex} is larger than {@code endIndex}.
10103 * @since 1.5
10104 */
10105 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
10106 Objects.checkFromToIndex(beginIndex, endIndex, seq.length());
10107 int n = endIndex - beginIndex;
10108 for (int i = beginIndex; i < endIndex; ) {
10109 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
10110 isLowSurrogate(seq.charAt(i))) {
10111 n--;
10112 i++;
10113 }
10114 }
10115 return n;
10116 }
10117
10118 /**
10119 * Returns the number of Unicode code points in a subarray of the
10120 * {@code char} array argument. The {@code offset}
10121 * argument is the index of the first {@code char} of the
10122 * subarray and the {@code count} argument specifies the
10123 * length of the subarray in {@code char}s. Unpaired
10124 * surrogates within the subarray count as one code point each.
10125 *
10126 * @param a the {@code char} array
10127 * @param offset the index of the first {@code char} in the
10128 * given {@code char} array
10129 * @param count the length of the subarray in {@code char}s
10130 * @return the number of Unicode code points in the specified subarray
10131 * @throws NullPointerException if {@code a} is null.
10132 * @throws IndexOutOfBoundsException if {@code offset} or
10133 * {@code count} is negative, or if {@code offset +
10134 * count} is larger than the length of the given array.
10135 * @since 1.5
10136 */
10137 public static int codePointCount(char[] a, int offset, int count) {
10138 Objects.checkFromIndexSize(offset, count, a.length);
10139 return codePointCountImpl(a, offset, count);
10140 }
10141
10142 static int codePointCountImpl(char[] a, int offset, int count) {
10143 int endIndex = offset + count;
10144 int n = count;
10145 for (int i = offset; i < endIndex; ) {
10146 if (isHighSurrogate(a[i++]) && i < endIndex &&
10147 isLowSurrogate(a[i])) {
10148 n--;
10149 i++;
10150 }
10151 }
10152 return n;
10153 }
10154
10155 /**
10156 * Returns the index within the given char sequence that is offset
10157 * from the given {@code index} by {@code codePointOffset}
10158 * code points. Unpaired surrogates within the text range given by
10159 * {@code index} and {@code codePointOffset} count as
10160 * one code point each.
10161 *
10162 * @param seq the char sequence
10163 * @param index the index to be offset
10164 * @param codePointOffset the offset in code points
10165 * @return the index within the char sequence
10166 * @throws NullPointerException if {@code seq} is null.
10167 * @throws IndexOutOfBoundsException if {@code index}
10168 * is negative or larger than the length of the char sequence,
10169 * or if {@code codePointOffset} is positive and the
10170 * subsequence starting with {@code index} has fewer than
10171 * {@code codePointOffset} code points, or if
10172 * {@code codePointOffset} is negative and the subsequence
10173 * before {@code index} has fewer than the absolute value
10174 * of {@code codePointOffset} code points.
10175 * @since 1.5
10176 */
10177 public static int offsetByCodePoints(CharSequence seq, int index,
10178 int codePointOffset) {
10179 int length = seq.length();
10180 if (index < 0 || index > length) {
10181 throw new IndexOutOfBoundsException();
10182 }
10183
10184 int x = index;
10185 if (codePointOffset >= 0) {
10186 int i;
10187 for (i = 0; x < length && i < codePointOffset; i++) {
10188 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
10189 isLowSurrogate(seq.charAt(x))) {
10190 x++;
10191 }
10192 }
10193 if (i < codePointOffset) {
10194 throw new IndexOutOfBoundsException();
10195 }
10196 } else {
10197 int i;
10198 for (i = codePointOffset; x > 0 && i < 0; i++) {
10199 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
10200 isHighSurrogate(seq.charAt(x-1))) {
10201 x--;
10202 }
10203 }
10204 if (i < 0) {
10205 throw new IndexOutOfBoundsException();
10206 }
10207 }
10208 return x;
10209 }
10210
10211 /**
10212 * Returns the index within the given {@code char} subarray
10213 * that is offset from the given {@code index} by
10214 * {@code codePointOffset} code points. The
10215 * {@code start} and {@code count} arguments specify a
10216 * subarray of the {@code char} array. Unpaired surrogates
10217 * within the text range given by {@code index} and
10218 * {@code codePointOffset} count as one code point each.
10219 *
10220 * @param a the {@code char} array
10221 * @param start the index of the first {@code char} of the
10222 * subarray
10223 * @param count the length of the subarray in {@code char}s
10224 * @param index the index to be offset
10225 * @param codePointOffset the offset in code points
10226 * @return the index within the subarray
10227 * @throws NullPointerException if {@code a} is null.
10228 * @throws IndexOutOfBoundsException
10229 * if {@code start} or {@code count} is negative,
10230 * or if {@code start + count} is larger than the length of
10231 * the given array,
10232 * or if {@code index} is less than {@code start} or
10233 * larger then {@code start + count},
10234 * or if {@code codePointOffset} is positive and the text range
10235 * starting with {@code index} and ending with {@code start + count - 1}
10236 * has fewer than {@code codePointOffset} code
10237 * points,
10238 * or if {@code codePointOffset} is negative and the text range
10239 * starting with {@code start} and ending with {@code index - 1}
10240 * has fewer than the absolute value of
10241 * {@code codePointOffset} code points.
10242 * @since 1.5
10243 */
10244 public static int offsetByCodePoints(char[] a, int start, int count,
10245 int index, int codePointOffset) {
10246 if (count > a.length-start || start < 0 || count < 0
10247 || index < start || index > start+count) {
10248 throw new IndexOutOfBoundsException();
10249 }
10250 return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
10251 }
10252
10253 static int offsetByCodePointsImpl(char[]a, int start, int count,
10254 int index, int codePointOffset) {
10255 int x = index;
10256 if (codePointOffset >= 0) {
10257 int limit = start + count;
10258 int i;
10259 for (i = 0; x < limit && i < codePointOffset; i++) {
10260 if (isHighSurrogate(a[x++]) && x < limit &&
10261 isLowSurrogate(a[x])) {
10262 x++;
10263 }
10264 }
10265 if (i < codePointOffset) {
10266 throw new IndexOutOfBoundsException();
10267 }
10268 } else {
10269 int i;
10270 for (i = codePointOffset; x > start && i < 0; i++) {
10271 if (isLowSurrogate(a[--x]) && x > start &&
10272 isHighSurrogate(a[x-1])) {
10273 x--;
10274 }
10275 }
10276 if (i < 0) {
10277 throw new IndexOutOfBoundsException();
10278 }
10279 }
10280 return x;
10281 }
10282
10283 /**
10284 * Determines if the specified character is a lowercase character.
10285 * <p>
10286 * A character is lowercase if its general category type, provided
10287 * by {@code Character.getType(ch)}, is
10288 * {@code LOWERCASE_LETTER}, or it has contributory property
10289 * Other_Lowercase as defined by the Unicode Standard.
10290 * <p>
10291 * The following are examples of lowercase characters:
10292 * <blockquote><pre>
10293 * a b c d e f g h i j k l m n o p q r s t u v w x y z
10294 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
10295 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
10296 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
10297 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
10298 * </pre></blockquote>
10299 * <p> Many other Unicode characters are lowercase too.
10300 *
10301 * <p><b>Note:</b> This method cannot handle <a
10302 * href="#supplementary"> supplementary characters</a>. To support
10303 * all Unicode characters, including supplementary characters, use
10304 * the {@link #isLowerCase(int)} method.
10305 *
10306 * @param ch the character to be tested.
10307 * @return {@code true} if the character is lowercase;
10308 * {@code false} otherwise.
10309 * @see Character#isLowerCase(char)
10310 * @see Character#isTitleCase(char)
10311 * @see Character#toLowerCase(char)
10312 * @see Character#getType(char)
10313 */
10314 public static boolean isLowerCase(char ch) {
10315 return isLowerCase((int)ch);
10316 }
10317
10318 /**
10319 * Determines if the specified character (Unicode code point) is a
10320 * lowercase character.
10321 * <p>
10322 * A character is lowercase if its general category type, provided
10323 * by {@link Character#getType getType(codePoint)}, is
10324 * {@code LOWERCASE_LETTER}, or it has contributory property
10325 * Other_Lowercase as defined by the Unicode Standard.
10326 * <p>
10327 * The following are examples of lowercase characters:
10328 * <blockquote><pre>
10329 * a b c d e f g h i j k l m n o p q r s t u v w x y z
10330 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
10331 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
10332 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
10333 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
10334 * </pre></blockquote>
10335 * <p> Many other Unicode characters are lowercase too.
10336 *
10337 * @param codePoint the character (Unicode code point) to be tested.
10338 * @return {@code true} if the character is lowercase;
10339 * {@code false} otherwise.
10340 * @see Character#isLowerCase(int)
10341 * @see Character#isTitleCase(int)
10342 * @see Character#toLowerCase(int)
10343 * @see Character#getType(int)
10344 * @since 1.5
10345 */
10346 public static boolean isLowerCase(int codePoint) {
10347 return CharacterData.of(codePoint).isLowerCase(codePoint);
10348 }
10349
10350 /**
10351 * Determines if the specified character is an uppercase character.
10352 * <p>
10353 * A character is uppercase if its general category type, provided by
10354 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
10355 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
10356 * <p>
10357 * The following are examples of uppercase characters:
10358 * <blockquote><pre>
10359 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
10360 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
10361 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
10362 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
10363 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
10364 * </pre></blockquote>
10365 * <p> Many other Unicode characters are uppercase too.
10366 *
10367 * <p><b>Note:</b> This method cannot handle <a
10368 * href="#supplementary"> supplementary characters</a>. To support
10369 * all Unicode characters, including supplementary characters, use
10370 * the {@link #isUpperCase(int)} method.
10371 *
10372 * @param ch the character to be tested.
10373 * @return {@code true} if the character is uppercase;
10374 * {@code false} otherwise.
10375 * @see Character#isLowerCase(char)
10376 * @see Character#isTitleCase(char)
10377 * @see Character#toUpperCase(char)
10378 * @see Character#getType(char)
10379 * @since 1.0
10380 */
10381 public static boolean isUpperCase(char ch) {
10382 return isUpperCase((int)ch);
10383 }
10384
10385 /**
10386 * Determines if the specified character (Unicode code point) is an uppercase character.
10387 * <p>
10388 * A character is uppercase if its general category type, provided by
10389 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
10390 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
10391 * <p>
10392 * The following are examples of uppercase characters:
10393 * <blockquote><pre>
10394 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
10395 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
10396 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
10397 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
10398 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
10399 * </pre></blockquote>
10400 * <p> Many other Unicode characters are uppercase too.
10401 *
10402 * @param codePoint the character (Unicode code point) to be tested.
10403 * @return {@code true} if the character is uppercase;
10404 * {@code false} otherwise.
10405 * @see Character#isLowerCase(int)
10406 * @see Character#isTitleCase(int)
10407 * @see Character#toUpperCase(int)
10408 * @see Character#getType(int)
10409 * @since 1.5
10410 */
10411 public static boolean isUpperCase(int codePoint) {
10412 return CharacterData.of(codePoint).isUpperCase(codePoint);
10413 }
10414
10415 /**
10416 * Determines if the specified character is a titlecase character.
10417 * <p>
10418 * A character is a titlecase character if its general
10419 * category type, provided by {@code Character.getType(ch)},
10420 * is {@code TITLECASE_LETTER}.
10421 * <p>
10422 * Some characters look like pairs of Latin letters. For example, there
10423 * is an uppercase letter that looks like "LJ" and has a corresponding
10424 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10425 * is the appropriate form to use when rendering a word in lowercase
10426 * with initial capitals, as for a book title.
10427 * <p>
10428 * These are some of the Unicode characters for which this method returns
10429 * {@code true}:
10430 * <ul>
10431 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10432 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10433 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10434 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10435 * </ul>
10436 * <p> Many other Unicode characters are titlecase too.
10437 *
10438 * <p><b>Note:</b> This method cannot handle <a
10439 * href="#supplementary"> supplementary characters</a>. To support
10440 * all Unicode characters, including supplementary characters, use
10441 * the {@link #isTitleCase(int)} method.
10442 *
10443 * @param ch the character to be tested.
10444 * @return {@code true} if the character is titlecase;
10445 * {@code false} otherwise.
10446 * @see Character#isLowerCase(char)
10447 * @see Character#isUpperCase(char)
10448 * @see Character#toTitleCase(char)
10449 * @see Character#getType(char)
10450 * @since 1.0.2
10451 */
10452 public static boolean isTitleCase(char ch) {
10453 return isTitleCase((int)ch);
10454 }
10455
10456 /**
10457 * Determines if the specified character (Unicode code point) is a titlecase character.
10458 * <p>
10459 * A character is a titlecase character if its general
10460 * category type, provided by {@link Character#getType(int) getType(codePoint)},
10461 * is {@code TITLECASE_LETTER}.
10462 * <p>
10463 * Some characters look like pairs of Latin letters. For example, there
10464 * is an uppercase letter that looks like "LJ" and has a corresponding
10465 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10466 * is the appropriate form to use when rendering a word in lowercase
10467 * with initial capitals, as for a book title.
10468 * <p>
10469 * These are some of the Unicode characters for which this method returns
10470 * {@code true}:
10471 * <ul>
10472 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10473 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10474 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10475 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10476 * </ul>
10477 * <p> Many other Unicode characters are titlecase too.
10478 *
10479 * @param codePoint the character (Unicode code point) to be tested.
10480 * @return {@code true} if the character is titlecase;
10481 * {@code false} otherwise.
10482 * @see Character#isLowerCase(int)
10483 * @see Character#isUpperCase(int)
10484 * @see Character#toTitleCase(int)
10485 * @see Character#getType(int)
10486 * @since 1.5
10487 */
10488 public static boolean isTitleCase(int codePoint) {
10489 return getType(codePoint) == Character.TITLECASE_LETTER;
10490 }
10491
10492 /**
10493 * Determines if the specified character is a digit.
10494 * <p>
10495 * A character is a digit if its general category type, provided
10496 * by {@code Character.getType(ch)}, is
10497 * {@code DECIMAL_DIGIT_NUMBER}.
10498 * <p>
10499 * Some Unicode character ranges that contain digits:
10500 * <ul>
10501 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10502 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10503 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10504 * Arabic-Indic digits
10505 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10506 * Extended Arabic-Indic digits
10507 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10508 * Devanagari digits
10509 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10510 * Fullwidth digits
10511 * </ul>
10512 *
10513 * Many other character ranges contain digits as well.
10514 *
10515 * <p><b>Note:</b> This method cannot handle <a
10516 * href="#supplementary"> supplementary characters</a>. To support
10517 * all Unicode characters, including supplementary characters, use
10518 * the {@link #isDigit(int)} method.
10519 *
10520 * @param ch the character to be tested.
10521 * @return {@code true} if the character is a digit;
10522 * {@code false} otherwise.
10523 * @see Character#digit(char, int)
10524 * @see Character#forDigit(int, int)
10525 * @see Character#getType(char)
10526 */
10527 public static boolean isDigit(char ch) {
10528 return isDigit((int)ch);
10529 }
10530
10531 /**
10532 * Determines if the specified character (Unicode code point) is a digit.
10533 * <p>
10534 * A character is a digit if its general category type, provided
10535 * by {@link Character#getType(int) getType(codePoint)}, is
10536 * {@code DECIMAL_DIGIT_NUMBER}.
10537 * <p>
10538 * Some Unicode character ranges that contain digits:
10539 * <ul>
10540 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10541 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10542 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10543 * Arabic-Indic digits
10544 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10545 * Extended Arabic-Indic digits
10546 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10547 * Devanagari digits
10548 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10549 * Fullwidth digits
10550 * </ul>
10551 *
10552 * Many other character ranges contain digits as well.
10553 *
10554 * @param codePoint the character (Unicode code point) to be tested.
10555 * @return {@code true} if the character is a digit;
10556 * {@code false} otherwise.
10557 * @see Character#forDigit(int, int)
10558 * @see Character#getType(int)
10559 * @since 1.5
10560 */
10561 public static boolean isDigit(int codePoint) {
10562 return CharacterData.of(codePoint).isDigit(codePoint);
10563 }
10564
10565 /**
10566 * Determines if a character is defined in Unicode.
10567 * <p>
10568 * A character is defined if at least one of the following is true:
10569 * <ul>
10570 * <li>It has an entry in the UnicodeData file.
10571 * <li>It has a value in a range defined by the UnicodeData file.
10572 * </ul>
10573 *
10574 * <p><b>Note:</b> This method cannot handle <a
10575 * href="#supplementary"> supplementary characters</a>. To support
10576 * all Unicode characters, including supplementary characters, use
10577 * the {@link #isDefined(int)} method.
10578 *
10579 * @param ch the character to be tested
10580 * @return {@code true} if the character has a defined meaning
10581 * in Unicode; {@code false} otherwise.
10582 * @see Character#isDigit(char)
10583 * @see Character#isLetter(char)
10584 * @see Character#isLetterOrDigit(char)
10585 * @see Character#isLowerCase(char)
10586 * @see Character#isTitleCase(char)
10587 * @see Character#isUpperCase(char)
10588 * @since 1.0.2
10589 */
10590 public static boolean isDefined(char ch) {
10591 return isDefined((int)ch);
10592 }
10593
10594 /**
10595 * Determines if a character (Unicode code point) is defined in Unicode.
10596 * <p>
10597 * A character is defined if at least one of the following is true:
10598 * <ul>
10599 * <li>It has an entry in the UnicodeData file.
10600 * <li>It has a value in a range defined by the UnicodeData file.
10601 * </ul>
10602 *
10603 * @param codePoint the character (Unicode code point) to be tested.
10604 * @return {@code true} if the character has a defined meaning
10605 * in Unicode; {@code false} otherwise.
10606 * @see Character#isDigit(int)
10607 * @see Character#isLetter(int)
10608 * @see Character#isLetterOrDigit(int)
10609 * @see Character#isLowerCase(int)
10610 * @see Character#isTitleCase(int)
10611 * @see Character#isUpperCase(int)
10612 * @since 1.5
10613 */
10614 public static boolean isDefined(int codePoint) {
10615 return getType(codePoint) != Character.UNASSIGNED;
10616 }
10617
10618 /**
10619 * Determines if the specified character is a letter.
10620 * <p>
10621 * A character is considered to be a letter if its general
10622 * category type, provided by {@code Character.getType(ch)},
10623 * is any of the following:
10624 * <ul>
10625 * <li> {@code UPPERCASE_LETTER}
10626 * <li> {@code LOWERCASE_LETTER}
10627 * <li> {@code TITLECASE_LETTER}
10628 * <li> {@code MODIFIER_LETTER}
10629 * <li> {@code OTHER_LETTER}
10630 * </ul>
10631 *
10632 * Not all letters have case. Many characters are
10633 * letters but are neither uppercase nor lowercase nor titlecase.
10634 *
10635 * <p><b>Note:</b> This method cannot handle <a
10636 * href="#supplementary"> supplementary characters</a>. To support
10637 * all Unicode characters, including supplementary characters, use
10638 * the {@link #isLetter(int)} method.
10639 *
10640 * @param ch the character to be tested.
10641 * @return {@code true} if the character is a letter;
10642 * {@code false} otherwise.
10643 * @see Character#isDigit(char)
10644 * @see Character#isJavaIdentifierStart(char)
10645 * @see Character#isJavaLetter(char)
10646 * @see Character#isJavaLetterOrDigit(char)
10647 * @see Character#isLetterOrDigit(char)
10648 * @see Character#isLowerCase(char)
10649 * @see Character#isTitleCase(char)
10650 * @see Character#isUnicodeIdentifierStart(char)
10651 * @see Character#isUpperCase(char)
10652 */
10653 public static boolean isLetter(char ch) {
10654 return isLetter((int)ch);
10655 }
10656
10657 /**
10658 * Determines if the specified character (Unicode code point) is a letter.
10659 * <p>
10660 * A character is considered to be a letter if its general
10661 * category type, provided by {@link Character#getType(int) getType(codePoint)},
10662 * is any of the following:
10663 * <ul>
10664 * <li> {@code UPPERCASE_LETTER}
10665 * <li> {@code LOWERCASE_LETTER}
10666 * <li> {@code TITLECASE_LETTER}
10667 * <li> {@code MODIFIER_LETTER}
10668 * <li> {@code OTHER_LETTER}
10669 * </ul>
10670 *
10671 * Not all letters have case. Many characters are
10672 * letters but are neither uppercase nor lowercase nor titlecase.
10673 *
10674 * @param codePoint the character (Unicode code point) to be tested.
10675 * @return {@code true} if the character is a letter;
10676 * {@code false} otherwise.
10677 * @see Character#isDigit(int)
10678 * @see Character#isJavaIdentifierStart(int)
10679 * @see Character#isLetterOrDigit(int)
10680 * @see Character#isLowerCase(int)
10681 * @see Character#isTitleCase(int)
10682 * @see Character#isUnicodeIdentifierStart(int)
10683 * @see Character#isUpperCase(int)
10684 * @since 1.5
10685 */
10686 public static boolean isLetter(int codePoint) {
10687 return ((((1 << Character.UPPERCASE_LETTER) |
10688 (1 << Character.LOWERCASE_LETTER) |
10689 (1 << Character.TITLECASE_LETTER) |
10690 (1 << Character.MODIFIER_LETTER) |
10691 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
10692 != 0;
10693 }
10694
10695 /**
10696 * Determines if the specified character is a letter or digit.
10697 * <p>
10698 * A character is considered to be a letter or digit if either
10699 * {@code Character.isLetter(char ch)} or
10700 * {@code Character.isDigit(char ch)} returns
10701 * {@code true} for the character.
10702 *
10703 * <p><b>Note:</b> This method cannot handle <a
10704 * href="#supplementary"> supplementary characters</a>. To support
10705 * all Unicode characters, including supplementary characters, use
10706 * the {@link #isLetterOrDigit(int)} method.
10707 *
10708 * @param ch the character to be tested.
10709 * @return {@code true} if the character is a letter or digit;
10710 * {@code false} otherwise.
10711 * @see Character#isDigit(char)
10712 * @see Character#isJavaIdentifierPart(char)
10713 * @see Character#isJavaLetter(char)
10714 * @see Character#isJavaLetterOrDigit(char)
10715 * @see Character#isLetter(char)
10716 * @see Character#isUnicodeIdentifierPart(char)
10717 * @since 1.0.2
10718 */
10719 public static boolean isLetterOrDigit(char ch) {
10720 return isLetterOrDigit((int)ch);
10721 }
10722
10723 /**
10724 * Determines if the specified character (Unicode code point) is a letter or digit.
10725 * <p>
10726 * A character is considered to be a letter or digit if either
10727 * {@link #isLetter(int) isLetter(codePoint)} or
10728 * {@link #isDigit(int) isDigit(codePoint)} returns
10729 * {@code true} for the character.
10730 *
10731 * @param codePoint the character (Unicode code point) to be tested.
10732 * @return {@code true} if the character is a letter or digit;
10733 * {@code false} otherwise.
10734 * @see Character#isDigit(int)
10735 * @see Character#isJavaIdentifierPart(int)
10736 * @see Character#isLetter(int)
10737 * @see Character#isUnicodeIdentifierPart(int)
10738 * @since 1.5
10739 */
10740 public static boolean isLetterOrDigit(int codePoint) {
10741 return ((((1 << Character.UPPERCASE_LETTER) |
10742 (1 << Character.LOWERCASE_LETTER) |
10743 (1 << Character.TITLECASE_LETTER) |
10744 (1 << Character.MODIFIER_LETTER) |
10745 (1 << Character.OTHER_LETTER) |
10746 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
10747 != 0;
10748 }
10749
10750 /**
10751 * Determines if the specified character is permissible as the first
10752 * character in a Java identifier.
10753 * <p>
10754 * A character may start a Java identifier if and only if
10755 * one of the following conditions is true:
10756 * <ul>
10757 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10758 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10759 * <li> {@code ch} is a currency symbol (such as {@code '$'})
10760 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10761 * </ul>
10762 *
10763 * @param ch the character to be tested.
10764 * @return {@code true} if the character may start a Java
10765 * identifier; {@code false} otherwise.
10766 * @see Character#isJavaLetterOrDigit(char)
10767 * @see Character#isJavaIdentifierStart(char)
10768 * @see Character#isJavaIdentifierPart(char)
10769 * @see Character#isLetter(char)
10770 * @see Character#isLetterOrDigit(char)
10771 * @see Character#isUnicodeIdentifierStart(char)
10772 * @since 1.0.2
10773 * @deprecated Replaced by isJavaIdentifierStart(char).
10774 */
10775 @Deprecated(since="1.1")
10776 public static boolean isJavaLetter(char ch) {
10777 return isJavaIdentifierStart(ch);
10778 }
10779
10780 /**
10781 * Determines if the specified character may be part of a Java
10782 * identifier as other than the first character.
10783 * <p>
10784 * A character may be part of a Java identifier if and only if one
10785 * of the following conditions is true:
10786 * <ul>
10787 * <li> it is a letter
10788 * <li> it is a currency symbol (such as {@code '$'})
10789 * <li> it is a connecting punctuation character (such as {@code '_'})
10790 * <li> it is a digit
10791 * <li> it is a numeric letter (such as a Roman numeral character)
10792 * <li> it is a combining mark
10793 * <li> it is a non-spacing mark
10794 * <li> {@code isIdentifierIgnorable} returns
10795 * {@code true} for the character.
10796 * </ul>
10797 *
10798 * @param ch the character to be tested.
10799 * @return {@code true} if the character may be part of a
10800 * Java identifier; {@code false} otherwise.
10801 * @see Character#isJavaLetter(char)
10802 * @see Character#isJavaIdentifierStart(char)
10803 * @see Character#isJavaIdentifierPart(char)
10804 * @see Character#isLetter(char)
10805 * @see Character#isLetterOrDigit(char)
10806 * @see Character#isUnicodeIdentifierPart(char)
10807 * @see Character#isIdentifierIgnorable(char)
10808 * @since 1.0.2
10809 * @deprecated Replaced by isJavaIdentifierPart(char).
10810 */
10811 @Deprecated(since="1.1")
10812 public static boolean isJavaLetterOrDigit(char ch) {
10813 return isJavaIdentifierPart(ch);
10814 }
10815
10816 /**
10817 * Determines if the specified character (Unicode code point) is alphabetic.
10818 * <p>
10819 * A character is considered to be alphabetic if its general category type,
10820 * provided by {@link Character#getType(int) getType(codePoint)}, is any of
10821 * the following:
10822 * <ul>
10823 * <li> {@code UPPERCASE_LETTER}
10824 * <li> {@code LOWERCASE_LETTER}
10825 * <li> {@code TITLECASE_LETTER}
10826 * <li> {@code MODIFIER_LETTER}
10827 * <li> {@code OTHER_LETTER}
10828 * <li> {@code LETTER_NUMBER}
10829 * </ul>
10830 * or it has contributory property Other_Alphabetic as defined by the
10831 * Unicode Standard.
10832 *
10833 * @param codePoint the character (Unicode code point) to be tested.
10834 * @return {@code true} if the character is a Unicode alphabet
10835 * character, {@code false} otherwise.
10836 * @since 1.7
10837 */
10838 public static boolean isAlphabetic(int codePoint) {
10839 return (((((1 << Character.UPPERCASE_LETTER) |
10840 (1 << Character.LOWERCASE_LETTER) |
10841 (1 << Character.TITLECASE_LETTER) |
10842 (1 << Character.MODIFIER_LETTER) |
10843 (1 << Character.OTHER_LETTER) |
10844 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
10845 CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
10846 }
10847
10848 /**
10849 * Determines if the specified character (Unicode code point) is a CJKV
10850 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
10851 * the Unicode Standard.
10852 *
10853 * @param codePoint the character (Unicode code point) to be tested.
10854 * @return {@code true} if the character is a Unicode ideograph
10855 * character, {@code false} otherwise.
10856 * @since 1.7
10857 */
10858 public static boolean isIdeographic(int codePoint) {
10859 return CharacterData.of(codePoint).isIdeographic(codePoint);
10860 }
10861
10862 /**
10863 * Determines if the specified character is
10864 * permissible as the first character in a Java identifier.
10865 * <p>
10866 * A character may start a Java identifier if and only if
10867 * one of the following conditions is true:
10868 * <ul>
10869 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10870 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10871 * <li> {@code ch} is a currency symbol (such as {@code '$'})
10872 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10873 * </ul>
10874 *
10875 * <p><b>Note:</b> This method cannot handle <a
10876 * href="#supplementary"> supplementary characters</a>. To support
10877 * all Unicode characters, including supplementary characters, use
10878 * the {@link #isJavaIdentifierStart(int)} method.
10879 *
10880 * @param ch the character to be tested.
10881 * @return {@code true} if the character may start a Java identifier;
10882 * {@code false} otherwise.
10883 * @see Character#isJavaIdentifierPart(char)
10884 * @see Character#isLetter(char)
10885 * @see Character#isUnicodeIdentifierStart(char)
10886 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10887 * @since 1.1
10888 */
10889 @SuppressWarnings("doclint:reference") // cross-module links
10890 public static boolean isJavaIdentifierStart(char ch) {
10891 return isJavaIdentifierStart((int)ch);
10892 }
10893
10894 /**
10895 * Determines if the character (Unicode code point) is
10896 * permissible as the first character in a Java identifier.
10897 * <p>
10898 * A character may start a Java identifier if and only if
10899 * one of the following conditions is true:
10900 * <ul>
10901 * <li> {@link #isLetter(int) isLetter(codePoint)}
10902 * returns {@code true}
10903 * <li> {@link #getType(int) getType(codePoint)}
10904 * returns {@code LETTER_NUMBER}
10905 * <li> the referenced character is a currency symbol (such as {@code '$'})
10906 * <li> the referenced character is a connecting punctuation character
10907 * (such as {@code '_'}).
10908 * </ul>
10909 *
10910 * @param codePoint the character (Unicode code point) to be tested.
10911 * @return {@code true} if the character may start a Java identifier;
10912 * {@code false} otherwise.
10913 * @see Character#isJavaIdentifierPart(int)
10914 * @see Character#isLetter(int)
10915 * @see Character#isUnicodeIdentifierStart(int)
10916 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10917 * @since 1.5
10918 */
10919 @SuppressWarnings("doclint:reference") // cross-module links
10920 public static boolean isJavaIdentifierStart(int codePoint) {
10921 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
10922 }
10923
10924 /**
10925 * Determines if the specified character may be part of a Java
10926 * identifier as other than the first character.
10927 * <p>
10928 * A character may be part of a Java identifier if any of the following
10929 * conditions are true:
10930 * <ul>
10931 * <li> it is a letter
10932 * <li> it is a currency symbol (such as {@code '$'})
10933 * <li> it is a connecting punctuation character (such as {@code '_'})
10934 * <li> it is a digit
10935 * <li> it is a numeric letter (such as a Roman numeral character)
10936 * <li> it is a combining mark
10937 * <li> it is a non-spacing mark
10938 * <li> {@code isIdentifierIgnorable} returns
10939 * {@code true} for the character
10940 * </ul>
10941 *
10942 * <p><b>Note:</b> This method cannot handle <a
10943 * href="#supplementary"> supplementary characters</a>. To support
10944 * all Unicode characters, including supplementary characters, use
10945 * the {@link #isJavaIdentifierPart(int)} method.
10946 *
10947 * @param ch the character to be tested.
10948 * @return {@code true} if the character may be part of a
10949 * Java identifier; {@code false} otherwise.
10950 * @see Character#isIdentifierIgnorable(char)
10951 * @see Character#isJavaIdentifierStart(char)
10952 * @see Character#isLetterOrDigit(char)
10953 * @see Character#isUnicodeIdentifierPart(char)
10954 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10955 * @since 1.1
10956 */
10957 @SuppressWarnings("doclint:reference") // cross-module links
10958 public static boolean isJavaIdentifierPart(char ch) {
10959 return isJavaIdentifierPart((int)ch);
10960 }
10961
10962 /**
10963 * Determines if the character (Unicode code point) may be part of a Java
10964 * identifier as other than the first character.
10965 * <p>
10966 * A character may be part of a Java identifier if any of the following
10967 * conditions are true:
10968 * <ul>
10969 * <li> it is a letter
10970 * <li> it is a currency symbol (such as {@code '$'})
10971 * <li> it is a connecting punctuation character (such as {@code '_'})
10972 * <li> it is a digit
10973 * <li> it is a numeric letter (such as a Roman numeral character)
10974 * <li> it is a combining mark
10975 * <li> it is a non-spacing mark
10976 * <li> {@link #isIdentifierIgnorable(int)
10977 * isIdentifierIgnorable(codePoint)} returns {@code true} for
10978 * the code point
10979 * </ul>
10980 *
10981 * @param codePoint the character (Unicode code point) to be tested.
10982 * @return {@code true} if the character may be part of a
10983 * Java identifier; {@code false} otherwise.
10984 * @see Character#isIdentifierIgnorable(int)
10985 * @see Character#isJavaIdentifierStart(int)
10986 * @see Character#isLetterOrDigit(int)
10987 * @see Character#isUnicodeIdentifierPart(int)
10988 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10989 * @since 1.5
10990 */
10991 @SuppressWarnings("doclint:reference") // cross-module links
10992 public static boolean isJavaIdentifierPart(int codePoint) {
10993 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
10994 }
10995
10996 /**
10997 * Determines if the specified character is permissible as the
10998 * first character in a Unicode identifier.
10999 * <p>
11000 * A character may start a Unicode identifier if and only if
11001 * one of the following conditions is true:
11002 * <ul>
11003 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
11004 * <li> {@link #getType(char) getType(ch)} returns
11005 * {@code LETTER_NUMBER}.
11006 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11007 * {@code Other_ID_Start}</a> character.
11008 * </ul>
11009 * <p>
11010 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11011 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11012 * with the following profile of UAX31:
11013 * <pre>
11014 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
11015 * </pre>
11016 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
11017 * compatibility.
11018 *
11019 * <p><b>Note:</b> This method cannot handle <a
11020 * href="#supplementary"> supplementary characters</a>. To support
11021 * all Unicode characters, including supplementary characters, use
11022 * the {@link #isUnicodeIdentifierStart(int)} method.
11023 *
11024 * @param ch the character to be tested.
11025 * @return {@code true} if the character may start a Unicode
11026 * identifier; {@code false} otherwise.
11027 *
11028 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11029 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11030 * @see Character#isJavaIdentifierStart(char)
11031 * @see Character#isLetter(char)
11032 * @see Character#isUnicodeIdentifierPart(char)
11033 * @since 1.1
11034 */
11035 public static boolean isUnicodeIdentifierStart(char ch) {
11036 return isUnicodeIdentifierStart((int)ch);
11037 }
11038
11039 /**
11040 * Determines if the specified character (Unicode code point) is permissible as the
11041 * first character in a Unicode identifier.
11042 * <p>
11043 * A character may start a Unicode identifier if and only if
11044 * one of the following conditions is true:
11045 * <ul>
11046 * <li> {@link #isLetter(int) isLetter(codePoint)}
11047 * returns {@code true}
11048 * <li> {@link #getType(int) getType(codePoint)}
11049 * returns {@code LETTER_NUMBER}.
11050 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11051 * {@code Other_ID_Start}</a> character.
11052 * </ul>
11053 * <p>
11054 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11055 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11056 * with the following profile of UAX31:
11057 * <pre>
11058 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
11059 * </pre>
11060 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
11061 * compatibility.
11062 *
11063 * @param codePoint the character (Unicode code point) to be tested.
11064 * @return {@code true} if the character may start a Unicode
11065 * identifier; {@code false} otherwise.
11066 *
11067 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11068 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11069 * @see Character#isJavaIdentifierStart(int)
11070 * @see Character#isLetter(int)
11071 * @see Character#isUnicodeIdentifierPart(int)
11072 * @since 1.5
11073 */
11074 public static boolean isUnicodeIdentifierStart(int codePoint) {
11075 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
11076 }
11077
11078 /**
11079 * Determines if the specified character may be part of a Unicode
11080 * identifier as other than the first character.
11081 * <p>
11082 * A character may be part of a Unicode identifier if and only if
11083 * one of the following statements is true:
11084 * <ul>
11085 * <li> it is a letter
11086 * <li> it is a connecting punctuation character (such as {@code '_'})
11087 * <li> it is a digit
11088 * <li> it is a numeric letter (such as a Roman numeral character)
11089 * <li> it is a combining mark
11090 * <li> it is a non-spacing mark
11091 * <li> {@code isIdentifierIgnorable} returns
11092 * {@code true} for this character.
11093 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11094 * {@code Other_ID_Start}</a> character.
11095 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
11096 * {@code Other_ID_Continue}</a> character.
11097 * </ul>
11098 * <p>
11099 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11100 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11101 * with the following profile of UAX31:
11102 * <pre>
11103 * Continue := Start + ID_Continue + ignorable
11104 * Medial := empty
11105 * ignorable := isIdentifierIgnorable(char) returns true for the character
11106 * </pre>
11107 * {@code ignorable} is added to {@code Continue} for backward
11108 * compatibility.
11109 *
11110 * <p><b>Note:</b> This method cannot handle <a
11111 * href="#supplementary"> supplementary characters</a>. To support
11112 * all Unicode characters, including supplementary characters, use
11113 * the {@link #isUnicodeIdentifierPart(int)} method.
11114 *
11115 * @param ch the character to be tested.
11116 * @return {@code true} if the character may be part of a
11117 * Unicode identifier; {@code false} otherwise.
11118 *
11119 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11120 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11121 * @see Character#isIdentifierIgnorable(char)
11122 * @see Character#isJavaIdentifierPart(char)
11123 * @see Character#isLetterOrDigit(char)
11124 * @see Character#isUnicodeIdentifierStart(char)
11125 * @since 1.1
11126 */
11127 public static boolean isUnicodeIdentifierPart(char ch) {
11128 return isUnicodeIdentifierPart((int)ch);
11129 }
11130
11131 /**
11132 * Determines if the specified character (Unicode code point) may be part of a Unicode
11133 * identifier as other than the first character.
11134 * <p>
11135 * A character may be part of a Unicode identifier if and only if
11136 * one of the following statements is true:
11137 * <ul>
11138 * <li> it is a letter
11139 * <li> it is a connecting punctuation character (such as {@code '_'})
11140 * <li> it is a digit
11141 * <li> it is a numeric letter (such as a Roman numeral character)
11142 * <li> it is a combining mark
11143 * <li> it is a non-spacing mark
11144 * <li> {@code isIdentifierIgnorable} returns
11145 * {@code true} for this character.
11146 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11147 * {@code Other_ID_Start}</a> character.
11148 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
11149 * {@code Other_ID_Continue}</a> character.
11150 * </ul>
11151 * <p>
11152 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11153 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11154 * with the following profile of UAX31:
11155 * <pre>
11156 * Continue := Start + ID_Continue + ignorable
11157 * Medial := empty
11158 * ignorable := isIdentifierIgnorable(int) returns true for the character
11159 * </pre>
11160 * {@code ignorable} is added to {@code Continue} for backward
11161 * compatibility.
11162 *
11163 * @param codePoint the character (Unicode code point) to be tested.
11164 * @return {@code true} if the character may be part of a
11165 * Unicode identifier; {@code false} otherwise.
11166 *
11167 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11168 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11169 * @see Character#isIdentifierIgnorable(int)
11170 * @see Character#isJavaIdentifierPart(int)
11171 * @see Character#isLetterOrDigit(int)
11172 * @see Character#isUnicodeIdentifierStart(int)
11173 * @since 1.5
11174 */
11175 public static boolean isUnicodeIdentifierPart(int codePoint) {
11176 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
11177 }
11178
11179 /**
11180 * Determines if the specified character should be regarded as
11181 * an ignorable character in a Java identifier or a Unicode identifier.
11182 * <p>
11183 * The following Unicode characters are ignorable in a Java identifier
11184 * or a Unicode identifier:
11185 * <ul>
11186 * <li>ISO control characters that are not whitespace
11187 * <ul>
11188 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
11189 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
11190 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
11191 * </ul>
11192 *
11193 * <li>all characters that have the {@code FORMAT} general
11194 * category value
11195 * </ul>
11196 *
11197 * <p><b>Note:</b> This method cannot handle <a
11198 * href="#supplementary"> supplementary characters</a>. To support
11199 * all Unicode characters, including supplementary characters, use
11200 * the {@link #isIdentifierIgnorable(int)} method.
11201 *
11202 * @param ch the character to be tested.
11203 * @return {@code true} if the character is an ignorable control
11204 * character that may be part of a Java or Unicode identifier;
11205 * {@code false} otherwise.
11206 * @see Character#isJavaIdentifierPart(char)
11207 * @see Character#isUnicodeIdentifierPart(char)
11208 * @since 1.1
11209 */
11210 public static boolean isIdentifierIgnorable(char ch) {
11211 return isIdentifierIgnorable((int)ch);
11212 }
11213
11214 /**
11215 * Determines if the specified character (Unicode code point) should be regarded as
11216 * an ignorable character in a Java identifier or a Unicode identifier.
11217 * <p>
11218 * The following Unicode characters are ignorable in a Java identifier
11219 * or a Unicode identifier:
11220 * <ul>
11221 * <li>ISO control characters that are not whitespace
11222 * <ul>
11223 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
11224 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
11225 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
11226 * </ul>
11227 *
11228 * <li>all characters that have the {@code FORMAT} general
11229 * category value
11230 * </ul>
11231 *
11232 * @param codePoint the character (Unicode code point) to be tested.
11233 * @return {@code true} if the character is an ignorable control
11234 * character that may be part of a Java or Unicode identifier;
11235 * {@code false} otherwise.
11236 * @see Character#isJavaIdentifierPart(int)
11237 * @see Character#isUnicodeIdentifierPart(int)
11238 * @since 1.5
11239 */
11240 public static boolean isIdentifierIgnorable(int codePoint) {
11241 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
11242 }
11243
11244 /**
11245 * Determines if the specified character (Unicode code point) is an Emoji.
11246 * <p>
11247 * A character is considered to be an Emoji if and only if it has the {@code Emoji}
11248 * property, defined in
11249 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11250 * Unicode Emoji (Technical Standard #51)</a>.
11251 *
11252 * @param codePoint the character (Unicode code point) to be tested.
11253 * @return {@code true} if the character is an Emoji;
11254 * {@code false} otherwise.
11255 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11256 * @since 21
11257 */
11258 public static boolean isEmoji(int codePoint) {
11259 return CharacterData.of(codePoint).isEmoji(codePoint);
11260 }
11261
11262 /**
11263 * Determines if the specified character (Unicode code point) has the
11264 * Emoji Presentation property by default.
11265 * <p>
11266 * A character is considered to have the Emoji Presentation property if and
11267 * only if it has the {@code Emoji_Presentation} property, defined in
11268 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11269 * Unicode Emoji (Technical Standard #51)</a>.
11270 *
11271 * @param codePoint the character (Unicode code point) to be tested.
11272 * @return {@code true} if the character has the Emoji Presentation
11273 * property; {@code false} otherwise.
11274 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11275 * @since 21
11276 */
11277 public static boolean isEmojiPresentation(int codePoint) {
11278 return CharacterData.of(codePoint).isEmojiPresentation(codePoint);
11279 }
11280
11281 /**
11282 * Determines if the specified character (Unicode code point) is an
11283 * Emoji Modifier.
11284 * <p>
11285 * A character is considered to be an Emoji Modifier if and only if it has
11286 * the {@code Emoji_Modifier} property, defined in
11287 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11288 * Unicode Emoji (Technical Standard #51)</a>.
11289 *
11290 * @param codePoint the character (Unicode code point) to be tested.
11291 * @return {@code true} if the character is an Emoji Modifier;
11292 * {@code false} otherwise.
11293 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11294 * @since 21
11295 */
11296 public static boolean isEmojiModifier(int codePoint) {
11297 return CharacterData.of(codePoint).isEmojiModifier(codePoint);
11298 }
11299
11300 /**
11301 * Determines if the specified character (Unicode code point) is an
11302 * Emoji Modifier Base.
11303 * <p>
11304 * A character is considered to be an Emoji Modifier Base if and only if it has
11305 * the {@code Emoji_Modifier_Base} property, defined in
11306 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11307 * Unicode Emoji (Technical Standard #51)</a>.
11308 *
11309 * @param codePoint the character (Unicode code point) to be tested.
11310 * @return {@code true} if the character is an Emoji Modifier Base;
11311 * {@code false} otherwise.
11312 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11313 * @since 21
11314 */
11315 public static boolean isEmojiModifierBase(int codePoint) {
11316 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint);
11317 }
11318
11319 /**
11320 * Determines if the specified character (Unicode code point) is an
11321 * Emoji Component.
11322 * <p>
11323 * A character is considered to be an Emoji Component if and only if it has
11324 * the {@code Emoji_Component} property, defined in
11325 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11326 * Unicode Emoji (Technical Standard #51)</a>.
11327 *
11328 * @param codePoint the character (Unicode code point) to be tested.
11329 * @return {@code true} if the character is an Emoji Component;
11330 * {@code false} otherwise.
11331 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11332 * @since 21
11333 */
11334 public static boolean isEmojiComponent(int codePoint) {
11335 return CharacterData.of(codePoint).isEmojiComponent(codePoint);
11336 }
11337
11338 /**
11339 * Determines if the specified character (Unicode code point) is
11340 * an Extended Pictographic.
11341 * <p>
11342 * A character is considered to be an Extended Pictographic if and only if it has
11343 * the {@code Extended_Pictographic} property, defined in
11344 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11345 * Unicode Emoji (Technical Standard #51)</a>.
11346 *
11347 * @param codePoint the character (Unicode code point) to be tested.
11348 * @return {@code true} if the character is an Extended Pictographic;
11349 * {@code false} otherwise.
11350 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11351 * @since 21
11352 */
11353 public static boolean isExtendedPictographic(int codePoint) {
11354 return CharacterData.of(codePoint).isExtendedPictographic(codePoint);
11355 }
11356
11357 /**
11358 * Converts the character argument to lowercase using case
11359 * mapping information from the UnicodeData file.
11360 * <p>
11361 * Note that
11362 * {@code Character.isLowerCase(Character.toLowerCase(ch))}
11363 * does not always return {@code true} for some ranges of
11364 * characters, particularly those that are symbols or ideographs.
11365 *
11366 * <p>In general, {@link String#toLowerCase()} should be used to map
11367 * characters to lowercase. {@code String} case mapping methods
11368 * have several benefits over {@code Character} case mapping methods.
11369 * {@code String} case mapping methods can perform locale-sensitive
11370 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11371 * the {@code Character} case mapping methods cannot.
11372 *
11373 * <p><b>Note:</b> This method cannot handle <a
11374 * href="#supplementary"> supplementary characters</a>. To support
11375 * all Unicode characters, including supplementary characters, use
11376 * the {@link #toLowerCase(int)} method.
11377 *
11378 * @param ch the character to be converted.
11379 * @return the lowercase equivalent of the character, if any;
11380 * otherwise, the character itself.
11381 * @see Character#isLowerCase(char)
11382 * @see String#toLowerCase()
11383 */
11384 public static char toLowerCase(char ch) {
11385 return (char)toLowerCase((int)ch);
11386 }
11387
11388 /**
11389 * Converts the character (Unicode code point) argument to
11390 * lowercase using case mapping information from the UnicodeData
11391 * file.
11392 *
11393 * <p> Note that
11394 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
11395 * does not always return {@code true} for some ranges of
11396 * characters, particularly those that are symbols or ideographs.
11397 *
11398 * <p>In general, {@link String#toLowerCase()} should be used to map
11399 * characters to lowercase. {@code String} case mapping methods
11400 * have several benefits over {@code Character} case mapping methods.
11401 * {@code String} case mapping methods can perform locale-sensitive
11402 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11403 * the {@code Character} case mapping methods cannot.
11404 *
11405 * @param codePoint the character (Unicode code point) to be converted.
11406 * @return the lowercase equivalent of the character (Unicode code
11407 * point), if any; otherwise, the character itself.
11408 * @see Character#isLowerCase(int)
11409 * @see String#toLowerCase()
11410 *
11411 * @since 1.5
11412 */
11413 public static int toLowerCase(int codePoint) {
11414 return CharacterData.of(codePoint).toLowerCase(codePoint);
11415 }
11416
11417 /**
11418 * Converts the character argument to uppercase using case mapping
11419 * information from the UnicodeData file.
11420 * <p>
11421 * Note that
11422 * {@code Character.isUpperCase(Character.toUpperCase(ch))}
11423 * does not always return {@code true} for some ranges of
11424 * characters, particularly those that are symbols or ideographs.
11425 *
11426 * <p>In general, {@link String#toUpperCase()} should be used to map
11427 * characters to uppercase. {@code String} case mapping methods
11428 * have several benefits over {@code Character} case mapping methods.
11429 * {@code String} case mapping methods can perform locale-sensitive
11430 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11431 * the {@code Character} case mapping methods cannot.
11432 *
11433 * <p><b>Note:</b> This method cannot handle <a
11434 * href="#supplementary"> supplementary characters</a>. To support
11435 * all Unicode characters, including supplementary characters, use
11436 * the {@link #toUpperCase(int)} method.
11437 *
11438 * @param ch the character to be converted.
11439 * @return the uppercase equivalent of the character, if any;
11440 * otherwise, the character itself.
11441 * @see Character#isUpperCase(char)
11442 * @see String#toUpperCase()
11443 */
11444 public static char toUpperCase(char ch) {
11445 return (char)toUpperCase((int)ch);
11446 }
11447
11448 /**
11449 * Converts the character (Unicode code point) argument to
11450 * uppercase using case mapping information from the UnicodeData
11451 * file.
11452 *
11453 * <p>Note that
11454 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
11455 * does not always return {@code true} for some ranges of
11456 * characters, particularly those that are symbols or ideographs.
11457 *
11458 * <p>In general, {@link String#toUpperCase()} should be used to map
11459 * characters to uppercase. {@code String} case mapping methods
11460 * have several benefits over {@code Character} case mapping methods.
11461 * {@code String} case mapping methods can perform locale-sensitive
11462 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11463 * the {@code Character} case mapping methods cannot.
11464 *
11465 * @param codePoint the character (Unicode code point) to be converted.
11466 * @return the uppercase equivalent of the character, if any;
11467 * otherwise, the character itself.
11468 * @see Character#isUpperCase(int)
11469 * @see String#toUpperCase()
11470 *
11471 * @since 1.5
11472 */
11473 public static int toUpperCase(int codePoint) {
11474 return CharacterData.of(codePoint).toUpperCase(codePoint);
11475 }
11476
11477 /**
11478 * Converts the character argument to titlecase using case mapping
11479 * information from the UnicodeData file. If a character has no
11480 * explicit titlecase mapping and is not itself a titlecase char
11481 * according to UnicodeData, then the uppercase mapping is
11482 * returned as an equivalent titlecase mapping. If the
11483 * {@code char} argument is already a titlecase
11484 * {@code char}, the same {@code char} value will be
11485 * returned.
11486 * <p>
11487 * Note that
11488 * {@code Character.isTitleCase(Character.toTitleCase(ch))}
11489 * does not always return {@code true} for some ranges of
11490 * characters.
11491 *
11492 * <p><b>Note:</b> This method cannot handle <a
11493 * href="#supplementary"> supplementary characters</a>. To support
11494 * all Unicode characters, including supplementary characters, use
11495 * the {@link #toTitleCase(int)} method.
11496 *
11497 * @param ch the character to be converted.
11498 * @return the titlecase equivalent of the character, if any;
11499 * otherwise, the character itself.
11500 * @see Character#isTitleCase(char)
11501 * @see Character#toLowerCase(char)
11502 * @see Character#toUpperCase(char)
11503 * @since 1.0.2
11504 */
11505 public static char toTitleCase(char ch) {
11506 return (char)toTitleCase((int)ch);
11507 }
11508
11509 /**
11510 * Converts the character (Unicode code point) argument to titlecase using case mapping
11511 * information from the UnicodeData file. If a character has no
11512 * explicit titlecase mapping and is not itself a titlecase char
11513 * according to UnicodeData, then the uppercase mapping is
11514 * returned as an equivalent titlecase mapping. If the
11515 * character argument is already a titlecase
11516 * character, the same character value will be
11517 * returned.
11518 *
11519 * <p>Note that
11520 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
11521 * does not always return {@code true} for some ranges of
11522 * characters.
11523 *
11524 * @param codePoint the character (Unicode code point) to be converted.
11525 * @return the titlecase equivalent of the character, if any;
11526 * otherwise, the character itself.
11527 * @see Character#isTitleCase(int)
11528 * @see Character#toLowerCase(int)
11529 * @see Character#toUpperCase(int)
11530 * @since 1.5
11531 */
11532 public static int toTitleCase(int codePoint) {
11533 return CharacterData.of(codePoint).toTitleCase(codePoint);
11534 }
11535
11536 /**
11537 * Returns the numeric value of the character {@code ch} in the
11538 * specified radix.
11539 * <p>
11540 * If the radix is not in the range {@code MIN_RADIX} ≤
11541 * {@code radix} ≤ {@code MAX_RADIX} or if the
11542 * value of {@code ch} is not a valid digit in the specified
11543 * radix, {@code -1} is returned. A character is a valid digit
11544 * if at least one of the following is true:
11545 * <ul>
11546 * <li>The method {@code isDigit} is {@code true} of the character
11547 * and the Unicode decimal digit value of the character (or its
11548 * single-character decomposition) is less than the specified radix.
11549 * In this case the decimal digit value is returned.
11550 * <li>The character is one of the uppercase Latin letters
11551 * {@code 'A'} through {@code 'Z'} and its code is less than
11552 * {@code radix + 'A' - 10}.
11553 * In this case, {@code ch - 'A' + 10}
11554 * is returned.
11555 * <li>The character is one of the lowercase Latin letters
11556 * {@code 'a'} through {@code 'z'} and its code is less than
11557 * {@code radix + 'a' - 10}.
11558 * In this case, {@code ch - 'a' + 10}
11559 * is returned.
11560 * <li>The character is one of the fullwidth uppercase Latin letters A
11561 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11562 * and its code is less than
11563 * {@code radix + '\u005CuFF21' - 10}.
11564 * In this case, {@code ch - '\u005CuFF21' + 10}
11565 * is returned.
11566 * <li>The character is one of the fullwidth lowercase Latin letters a
11567 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11568 * and its code is less than
11569 * {@code radix + '\u005CuFF41' - 10}.
11570 * In this case, {@code ch - '\u005CuFF41' + 10}
11571 * is returned.
11572 * </ul>
11573 *
11574 * <p><b>Note:</b> This method cannot handle <a
11575 * href="#supplementary"> supplementary characters</a>. To support
11576 * all Unicode characters, including supplementary characters, use
11577 * the {@link #digit(int, int)} method.
11578 *
11579 * @param ch the character to be converted.
11580 * @param radix the radix.
11581 * @return the numeric value represented by the character in the
11582 * specified radix.
11583 * @see Character#forDigit(int, int)
11584 * @see Character#isDigit(char)
11585 */
11586 public static int digit(char ch, int radix) {
11587 return digit((int)ch, radix);
11588 }
11589
11590 /**
11591 * Returns the numeric value of the specified character (Unicode
11592 * code point) in the specified radix.
11593 *
11594 * <p>If the radix is not in the range {@code MIN_RADIX} ≤
11595 * {@code radix} ≤ {@code MAX_RADIX} or if the
11596 * character is not a valid digit in the specified
11597 * radix, {@code -1} is returned. A character is a valid digit
11598 * if at least one of the following is true:
11599 * <ul>
11600 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
11601 * and the Unicode decimal digit value of the character (or its
11602 * single-character decomposition) is less than the specified radix.
11603 * In this case the decimal digit value is returned.
11604 * <li>The character is one of the uppercase Latin letters
11605 * {@code 'A'} through {@code 'Z'} and its code is less than
11606 * {@code radix + 'A' - 10}.
11607 * In this case, {@code codePoint - 'A' + 10}
11608 * is returned.
11609 * <li>The character is one of the lowercase Latin letters
11610 * {@code 'a'} through {@code 'z'} and its code is less than
11611 * {@code radix + 'a' - 10}.
11612 * In this case, {@code codePoint - 'a' + 10}
11613 * is returned.
11614 * <li>The character is one of the fullwidth uppercase Latin letters A
11615 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11616 * and its code is less than
11617 * {@code radix + '\u005CuFF21' - 10}.
11618 * In this case,
11619 * {@code codePoint - '\u005CuFF21' + 10}
11620 * is returned.
11621 * <li>The character is one of the fullwidth lowercase Latin letters a
11622 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11623 * and its code is less than
11624 * {@code radix + '\u005CuFF41'- 10}.
11625 * In this case,
11626 * {@code codePoint - '\u005CuFF41' + 10}
11627 * is returned.
11628 * </ul>
11629 *
11630 * @param codePoint the character (Unicode code point) to be converted.
11631 * @param radix the radix.
11632 * @return the numeric value represented by the character in the
11633 * specified radix.
11634 * @see Character#forDigit(int, int)
11635 * @see Character#isDigit(int)
11636 * @since 1.5
11637 */
11638 public static int digit(int codePoint, int radix) {
11639 return CharacterData.of(codePoint).digit(codePoint, radix);
11640 }
11641
11642 /**
11643 * Returns the {@code int} value that the specified Unicode
11644 * character represents. For example, the character
11645 * {@code '\u005Cu216C'} (the roman numeral fifty) will return
11646 * an int with a value of 50.
11647 * <p>
11648 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11649 * {@code '\u005Cu005A'}), lowercase
11650 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11651 * full width variant ({@code '\u005CuFF21'} through
11652 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11653 * {@code '\u005CuFF5A'}) forms have numeric values from 10
11654 * through 35. This is independent of the Unicode specification,
11655 * which does not assign numeric values to these {@code char}
11656 * values.
11657 * <p>
11658 * If the character does not have a numeric value, then -1 is returned.
11659 * If the character has a numeric value that cannot be represented as a
11660 * nonnegative integer (for example, a fractional value), then -2
11661 * is returned.
11662 *
11663 * <p><b>Note:</b> This method cannot handle <a
11664 * href="#supplementary"> supplementary characters</a>. To support
11665 * all Unicode characters, including supplementary characters, use
11666 * the {@link #getNumericValue(int)} method.
11667 *
11668 * @param ch the character to be converted.
11669 * @return the numeric value of the character, as a nonnegative {@code int}
11670 * value; -2 if the character has a numeric value but the value
11671 * can not be represented as a nonnegative {@code int} value;
11672 * -1 if the character has no numeric value.
11673 * @see Character#forDigit(int, int)
11674 * @see Character#isDigit(char)
11675 * @since 1.1
11676 */
11677 public static int getNumericValue(char ch) {
11678 return getNumericValue((int)ch);
11679 }
11680
11681 /**
11682 * Returns the {@code int} value that the specified
11683 * character (Unicode code point) represents. For example, the character
11684 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
11685 * an {@code int} with a value of 50.
11686 * <p>
11687 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11688 * {@code '\u005Cu005A'}), lowercase
11689 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11690 * full width variant ({@code '\u005CuFF21'} through
11691 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11692 * {@code '\u005CuFF5A'}) forms have numeric values from 10
11693 * through 35. This is independent of the Unicode specification,
11694 * which does not assign numeric values to these {@code char}
11695 * values.
11696 * <p>
11697 * If the character does not have a numeric value, then -1 is returned.
11698 * If the character has a numeric value that cannot be represented as a
11699 * nonnegative integer (for example, a fractional value), then -2
11700 * is returned.
11701 *
11702 * @param codePoint the character (Unicode code point) to be converted.
11703 * @return the numeric value of the character, as a nonnegative {@code int}
11704 * value; -2 if the character has a numeric value but the value
11705 * can not be represented as a nonnegative {@code int} value;
11706 * -1 if the character has no numeric value.
11707 * @see Character#forDigit(int, int)
11708 * @see Character#isDigit(int)
11709 * @since 1.5
11710 */
11711 public static int getNumericValue(int codePoint) {
11712 return CharacterData.of(codePoint).getNumericValue(codePoint);
11713 }
11714
11715 /**
11716 * Determines if the specified character is ISO-LATIN-1 white space.
11717 * This method returns {@code true} for the following five
11718 * characters only:
11719 * <table class="striped">
11720 * <caption style="display:none">truechars</caption>
11721 * <thead>
11722 * <tr><th scope="col">Character
11723 * <th scope="col">Code
11724 * <th scope="col">Name
11725 * </thead>
11726 * <tbody>
11727 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td>
11728 * <td>{@code HORIZONTAL TABULATION}</td></tr>
11729 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td>
11730 * <td>{@code NEW LINE}</td></tr>
11731 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td>
11732 * <td>{@code FORM FEED}</td></tr>
11733 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td>
11734 * <td>{@code CARRIAGE RETURN}</td></tr>
11735 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td>
11736 * <td>{@code SPACE}</td></tr>
11737 * </tbody>
11738 * </table>
11739 *
11740 * @param ch the character to be tested.
11741 * @return {@code true} if the character is ISO-LATIN-1 white
11742 * space; {@code false} otherwise.
11743 * @see Character#isSpaceChar(char)
11744 * @see Character#isWhitespace(char)
11745 * @deprecated Replaced by isWhitespace(char).
11746 */
11747 @Deprecated(since="1.1")
11748 public static boolean isSpace(char ch) {
11749 return (ch <= 0x0020) &&
11750 (((((1L << 0x0009) |
11751 (1L << 0x000A) |
11752 (1L << 0x000C) |
11753 (1L << 0x000D) |
11754 (1L << 0x0020)) >> ch) & 1L) != 0);
11755 }
11756
11757
11758 /**
11759 * Determines if the specified character is a Unicode space character.
11760 * A character is considered to be a space character if and only if
11761 * it is specified to be a space character by the Unicode Standard. This
11762 * method returns true if the character's general category type is any of
11763 * the following:
11764 * <ul>
11765 * <li> {@code SPACE_SEPARATOR}
11766 * <li> {@code LINE_SEPARATOR}
11767 * <li> {@code PARAGRAPH_SEPARATOR}
11768 * </ul>
11769 *
11770 * <p><b>Note:</b> This method cannot handle <a
11771 * href="#supplementary"> supplementary characters</a>. To support
11772 * all Unicode characters, including supplementary characters, use
11773 * the {@link #isSpaceChar(int)} method.
11774 *
11775 * @param ch the character to be tested.
11776 * @return {@code true} if the character is a space character;
11777 * {@code false} otherwise.
11778 * @see Character#isWhitespace(char)
11779 * @since 1.1
11780 */
11781 public static boolean isSpaceChar(char ch) {
11782 return isSpaceChar((int)ch);
11783 }
11784
11785 /**
11786 * Determines if the specified character (Unicode code point) is a
11787 * Unicode space character. A character is considered to be a
11788 * space character if and only if it is specified to be a space
11789 * character by the Unicode Standard. This method returns true if
11790 * the character's general category type is any of the following:
11791 *
11792 * <ul>
11793 * <li> {@link #SPACE_SEPARATOR}
11794 * <li> {@link #LINE_SEPARATOR}
11795 * <li> {@link #PARAGRAPH_SEPARATOR}
11796 * </ul>
11797 *
11798 * @param codePoint the character (Unicode code point) to be tested.
11799 * @return {@code true} if the character is a space character;
11800 * {@code false} otherwise.
11801 * @see Character#isWhitespace(int)
11802 * @since 1.5
11803 */
11804 public static boolean isSpaceChar(int codePoint) {
11805 return ((((1 << Character.SPACE_SEPARATOR) |
11806 (1 << Character.LINE_SEPARATOR) |
11807 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
11808 != 0;
11809 }
11810
11811 /**
11812 * Determines if the specified character is white space according to Java.
11813 * A character is a Java whitespace character if and only if it satisfies
11814 * one of the following criteria:
11815 * <ul>
11816 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
11817 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
11818 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
11819 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11820 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11821 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11822 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11823 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11824 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11825 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11826 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11827 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11828 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11829 * </ul>
11830 *
11831 * <p><b>Note:</b> This method cannot handle <a
11832 * href="#supplementary"> supplementary characters</a>. To support
11833 * all Unicode characters, including supplementary characters, use
11834 * the {@link #isWhitespace(int)} method.
11835 *
11836 * @param ch the character to be tested.
11837 * @return {@code true} if the character is a Java whitespace
11838 * character; {@code false} otherwise.
11839 * @see Character#isSpaceChar(char)
11840 * @since 1.1
11841 */
11842 public static boolean isWhitespace(char ch) {
11843 return isWhitespace((int)ch);
11844 }
11845
11846 /**
11847 * Determines if the specified character (Unicode code point) is
11848 * white space according to Java. A character is a Java
11849 * whitespace character if and only if it satisfies one of the
11850 * following criteria:
11851 * <ul>
11852 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
11853 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
11854 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
11855 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11856 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11857 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11858 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11859 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11860 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11861 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11862 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11863 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11864 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11865 * </ul>
11866 *
11867 * @param codePoint the character (Unicode code point) to be tested.
11868 * @return {@code true} if the character is a Java whitespace
11869 * character; {@code false} otherwise.
11870 * @see Character#isSpaceChar(int)
11871 * @since 1.5
11872 */
11873 public static boolean isWhitespace(int codePoint) {
11874 return CharacterData.of(codePoint).isWhitespace(codePoint);
11875 }
11876
11877 /**
11878 * Determines if the specified character is an ISO control
11879 * character. A character is considered to be an ISO control
11880 * character if its code is in the range {@code '\u005Cu0000'}
11881 * through {@code '\u005Cu001F'} or in the range
11882 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11883 *
11884 * <p><b>Note:</b> This method cannot handle <a
11885 * href="#supplementary"> supplementary characters</a>. To support
11886 * all Unicode characters, including supplementary characters, use
11887 * the {@link #isISOControl(int)} method.
11888 *
11889 * @param ch the character to be tested.
11890 * @return {@code true} if the character is an ISO control character;
11891 * {@code false} otherwise.
11892 *
11893 * @see Character#isSpaceChar(char)
11894 * @see Character#isWhitespace(char)
11895 * @since 1.1
11896 */
11897 public static boolean isISOControl(char ch) {
11898 return isISOControl((int)ch);
11899 }
11900
11901 /**
11902 * Determines if the referenced character (Unicode code point) is an ISO control
11903 * character. A character is considered to be an ISO control
11904 * character if its code is in the range {@code '\u005Cu0000'}
11905 * through {@code '\u005Cu001F'} or in the range
11906 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11907 *
11908 * @param codePoint the character (Unicode code point) to be tested.
11909 * @return {@code true} if the character is an ISO control character;
11910 * {@code false} otherwise.
11911 * @see Character#isSpaceChar(int)
11912 * @see Character#isWhitespace(int)
11913 * @since 1.5
11914 */
11915 public static boolean isISOControl(int codePoint) {
11916 // Optimized form of:
11917 // (codePoint >= 0x00 && codePoint <= 0x1F) ||
11918 // (codePoint >= 0x7F && codePoint <= 0x9F);
11919 return codePoint <= 0x9F &&
11920 (codePoint >= 0x7F || (codePoint >>> 5 == 0));
11921 }
11922
11923 /**
11924 * Returns a value indicating a character's general category.
11925 *
11926 * <p><b>Note:</b> This method cannot handle <a
11927 * href="#supplementary"> supplementary characters</a>. To support
11928 * all Unicode characters, including supplementary characters, use
11929 * the {@link #getType(int)} method.
11930 *
11931 * @param ch the character to be tested.
11932 * @return a value of type {@code int} representing the
11933 * character's general category.
11934 * @see Character#COMBINING_SPACING_MARK
11935 * @see Character#CONNECTOR_PUNCTUATION
11936 * @see Character#CONTROL
11937 * @see Character#CURRENCY_SYMBOL
11938 * @see Character#DASH_PUNCTUATION
11939 * @see Character#DECIMAL_DIGIT_NUMBER
11940 * @see Character#ENCLOSING_MARK
11941 * @see Character#END_PUNCTUATION
11942 * @see Character#FINAL_QUOTE_PUNCTUATION
11943 * @see Character#FORMAT
11944 * @see Character#INITIAL_QUOTE_PUNCTUATION
11945 * @see Character#LETTER_NUMBER
11946 * @see Character#LINE_SEPARATOR
11947 * @see Character#LOWERCASE_LETTER
11948 * @see Character#MATH_SYMBOL
11949 * @see Character#MODIFIER_LETTER
11950 * @see Character#MODIFIER_SYMBOL
11951 * @see Character#NON_SPACING_MARK
11952 * @see Character#OTHER_LETTER
11953 * @see Character#OTHER_NUMBER
11954 * @see Character#OTHER_PUNCTUATION
11955 * @see Character#OTHER_SYMBOL
11956 * @see Character#PARAGRAPH_SEPARATOR
11957 * @see Character#PRIVATE_USE
11958 * @see Character#SPACE_SEPARATOR
11959 * @see Character#START_PUNCTUATION
11960 * @see Character#SURROGATE
11961 * @see Character#TITLECASE_LETTER
11962 * @see Character#UNASSIGNED
11963 * @see Character#UPPERCASE_LETTER
11964 * @since 1.1
11965 */
11966 public static int getType(char ch) {
11967 return getType((int)ch);
11968 }
11969
11970 /**
11971 * Returns a value indicating a character's general category.
11972 *
11973 * @param codePoint the character (Unicode code point) to be tested.
11974 * @return a value of type {@code int} representing the
11975 * character's general category.
11976 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
11977 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
11978 * @see Character#CONTROL CONTROL
11979 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
11980 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
11981 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
11982 * @see Character#ENCLOSING_MARK ENCLOSING_MARK
11983 * @see Character#END_PUNCTUATION END_PUNCTUATION
11984 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
11985 * @see Character#FORMAT FORMAT
11986 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
11987 * @see Character#LETTER_NUMBER LETTER_NUMBER
11988 * @see Character#LINE_SEPARATOR LINE_SEPARATOR
11989 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
11990 * @see Character#MATH_SYMBOL MATH_SYMBOL
11991 * @see Character#MODIFIER_LETTER MODIFIER_LETTER
11992 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
11993 * @see Character#NON_SPACING_MARK NON_SPACING_MARK
11994 * @see Character#OTHER_LETTER OTHER_LETTER
11995 * @see Character#OTHER_NUMBER OTHER_NUMBER
11996 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
11997 * @see Character#OTHER_SYMBOL OTHER_SYMBOL
11998 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
11999 * @see Character#PRIVATE_USE PRIVATE_USE
12000 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
12001 * @see Character#START_PUNCTUATION START_PUNCTUATION
12002 * @see Character#SURROGATE SURROGATE
12003 * @see Character#TITLECASE_LETTER TITLECASE_LETTER
12004 * @see Character#UNASSIGNED UNASSIGNED
12005 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
12006 * @since 1.5
12007 */
12008 public static int getType(int codePoint) {
12009 return CharacterData.of(codePoint).getType(codePoint);
12010 }
12011
12012 /**
12013 * Determines the character representation for a specific digit in
12014 * the specified radix. If the value of {@code radix} is not a
12015 * valid radix, or the value of {@code digit} is not a valid
12016 * digit in the specified radix, the null character
12017 * ({@code '\u005Cu0000'}) is returned.
12018 * <p>
12019 * The {@code radix} argument is valid if it is greater than or
12020 * equal to {@code MIN_RADIX} and less than or equal to
12021 * {@code MAX_RADIX}. The {@code digit} argument is valid if
12022 * {@code 0 <= digit < radix}.
12023 * <p>
12024 * If the digit is less than 10, then
12025 * {@code '0' + digit} is returned. Otherwise, the value
12026 * {@code 'a' + digit - 10} is returned.
12027 *
12028 * @param digit the number to convert to a character.
12029 * @param radix the radix.
12030 * @return the {@code char} representation of the specified digit
12031 * in the specified radix.
12032 * @see Character#MIN_RADIX
12033 * @see Character#MAX_RADIX
12034 * @see Character#digit(char, int)
12035 */
12036 public static char forDigit(int digit, int radix) {
12037 if ((digit >= radix) || (digit < 0)) {
12038 return '\0';
12039 }
12040 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
12041 return '\0';
12042 }
12043 if (digit < 10) {
12044 return (char)('0' + digit);
12045 }
12046 return (char)('a' - 10 + digit);
12047 }
12048
12049 /**
12050 * Returns the Unicode directionality property for the given
12051 * character. Character directionality is used to calculate the
12052 * visual ordering of text. The directionality value of undefined
12053 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
12054 *
12055 * <p><b>Note:</b> This method cannot handle <a
12056 * href="#supplementary"> supplementary characters</a>. To support
12057 * all Unicode characters, including supplementary characters, use
12058 * the {@link #getDirectionality(int)} method.
12059 *
12060 * @param ch {@code char} for which the directionality property
12061 * is requested.
12062 * @return the directionality property of the {@code char} value.
12063 *
12064 * @see Character#DIRECTIONALITY_UNDEFINED
12065 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
12066 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
12067 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
12068 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
12069 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
12070 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
12071 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
12072 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
12073 * @see Character#DIRECTIONALITY_NONSPACING_MARK
12074 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
12075 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
12076 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
12077 * @see Character#DIRECTIONALITY_WHITESPACE
12078 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
12079 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
12080 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
12081 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
12082 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
12083 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
12084 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
12085 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
12086 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
12087 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
12088 * @since 1.4
12089 */
12090 public static byte getDirectionality(char ch) {
12091 return getDirectionality((int)ch);
12092 }
12093
12094 /**
12095 * Returns the Unicode directionality property for the given
12096 * character (Unicode code point). Character directionality is
12097 * used to calculate the visual ordering of text. The
12098 * directionality value of undefined character is {@link
12099 * #DIRECTIONALITY_UNDEFINED}.
12100 *
12101 * @param codePoint the character (Unicode code point) for which
12102 * the directionality property is requested.
12103 * @return the directionality property of the character.
12104 *
12105 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
12106 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
12107 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
12108 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
12109 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
12110 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
12111 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
12112 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
12113 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
12114 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
12115 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
12116 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
12117 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
12118 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
12119 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
12120 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
12121 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
12122 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
12123 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
12124 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
12125 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
12126 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
12127 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
12128 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
12129 * @since 1.5
12130 */
12131 public static byte getDirectionality(int codePoint) {
12132 return CharacterData.of(codePoint).getDirectionality(codePoint);
12133 }
12134
12135 /**
12136 * Determines whether the character is mirrored according to the
12137 * Unicode specification. Mirrored characters should have their
12138 * glyphs horizontally mirrored when displayed in text that is
12139 * right-to-left. For example, {@code '\u005Cu0028'} LEFT
12140 * PARENTHESIS is semantically defined to be an <i>opening
12141 * parenthesis</i>. This will appear as a "(" in text that is
12142 * left-to-right but as a ")" in text that is right-to-left.
12143 *
12144 * <p><b>Note:</b> This method cannot handle <a
12145 * href="#supplementary"> supplementary characters</a>. To support
12146 * all Unicode characters, including supplementary characters, use
12147 * the {@link #isMirrored(int)} method.
12148 *
12149 * @param ch {@code char} for which the mirrored property is requested
12150 * @return {@code true} if the char is mirrored, {@code false}
12151 * if the {@code char} is not mirrored or is not defined.
12152 * @since 1.4
12153 */
12154 public static boolean isMirrored(char ch) {
12155 return isMirrored((int)ch);
12156 }
12157
12158 /**
12159 * Determines whether the specified character (Unicode code point)
12160 * is mirrored according to the Unicode specification. Mirrored
12161 * characters should have their glyphs horizontally mirrored when
12162 * displayed in text that is right-to-left. For example,
12163 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
12164 * defined to be an <i>opening parenthesis</i>. This will appear
12165 * as a "(" in text that is left-to-right but as a ")" in text
12166 * that is right-to-left.
12167 *
12168 * @param codePoint the character (Unicode code point) to be tested.
12169 * @return {@code true} if the character is mirrored, {@code false}
12170 * if the character is not mirrored or is not defined.
12171 * @since 1.5
12172 */
12173 public static boolean isMirrored(int codePoint) {
12174 return CharacterData.of(codePoint).isMirrored(codePoint);
12175 }
12176
12177 /**
12178 * Compares two {@code Character} objects numerically.
12179 *
12180 * @param anotherCharacter the {@code Character} to be compared.
12181 * @return the value {@code 0} if the argument {@code Character}
12182 * is equal to this {@code Character}; a value less than
12183 * {@code 0} if this {@code Character} is numerically less
12184 * than the {@code Character} argument; and a value greater than
12185 * {@code 0} if this {@code Character} is numerically greater
12186 * than the {@code Character} argument (unsigned comparison).
12187 * Note that this is strictly a numerical comparison; it is not
12188 * locale-dependent.
12189 * @since 1.2
12190 */
12191 public int compareTo(Character anotherCharacter) {
12192 return compare(this.value, anotherCharacter.value);
12193 }
12194
12195 /**
12196 * Compares two {@code char} values numerically.
12197 * The value returned is identical to what would be returned by:
12198 * <pre>
12199 * Character.valueOf(x).compareTo(Character.valueOf(y))
12200 * </pre>
12201 *
12202 * @param x the first {@code char} to compare
12203 * @param y the second {@code char} to compare
12204 * @return the value {@code 0} if {@code x == y};
12205 * a value less than {@code 0} if {@code x < y}; and
12206 * a value greater than {@code 0} if {@code x > y}
12207 * @since 1.7
12208 */
12209 public static int compare(char x, char y) {
12210 return x - y;
12211 }
12212
12213 /**
12214 * Converts the character (Unicode code point) argument to uppercase using
12215 * information from the UnicodeData file.
12216 *
12217 * @param codePoint the character (Unicode code point) to be converted.
12218 * @return either the uppercase equivalent of the character, if
12219 * any, or an error flag ({@code Character.ERROR})
12220 * that indicates that a 1:M {@code char} mapping exists.
12221 * @see Character#isLowerCase(char)
12222 * @see Character#isUpperCase(char)
12223 * @see Character#toLowerCase(char)
12224 * @see Character#toTitleCase(char)
12225 * @since 1.4
12226 */
12227 static int toUpperCaseEx(int codePoint) {
12228 assert isValidCodePoint(codePoint);
12229 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
12230 }
12231
12232 /**
12233 * Converts the character (Unicode code point) argument to uppercase using case
12234 * mapping information from the SpecialCasing file in the Unicode
12235 * specification. If a character has no explicit uppercase
12236 * mapping, then the {@code char} itself is returned in the
12237 * {@code char[]}.
12238 *
12239 * @param codePoint the character (Unicode code point) to be converted.
12240 * @return a {@code char[]} with the uppercased character.
12241 * @since 1.4
12242 */
12243 static char[] toUpperCaseCharArray(int codePoint) {
12244 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
12245 assert isBmpCodePoint(codePoint);
12246 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
12247 }
12248
12249 /**
12250 * The number of bits used to represent a {@code char} value in unsigned
12251 * binary form, constant {@code 16}.
12252 *
12253 * @since 1.5
12254 */
12255 public static final int SIZE = 16;
12256
12257 /**
12258 * The number of bytes used to represent a {@code char} value in unsigned
12259 * binary form.
12260 *
12261 * @since 1.8
12262 */
12263 public static final int BYTES = SIZE / Byte.SIZE;
12264
12265 /**
12266 * Returns the value obtained by reversing the order of the bytes in the
12267 * specified {@code char} value.
12268 *
12269 * @param ch The {@code char} of which to reverse the byte order.
12270 * @return the value obtained by reversing (or, equivalently, swapping)
12271 * the bytes in the specified {@code char} value.
12272 * @since 1.5
12273 */
12274 @IntrinsicCandidate
12275 public static char reverseBytes(char ch) {
12276 return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
12277 }
12278
12279 /**
12280 * Returns the name of the specified character
12281 * {@code codePoint}, or null if the code point is
12282 * {@link #UNASSIGNED unassigned}.
12283 * <p>
12284 * If the specified character is not assigned a name by
12285 * the <i>UnicodeData</i> file (part of the Unicode Character
12286 * Database maintained by the Unicode Consortium), the returned
12287 * name is the same as the result of the expression:
12288 *
12289 * <blockquote>{@code
12290 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12291 * + " "
12292 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12293 *
12294 * }</blockquote>
12295 *
12296 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name
12297 * returned by this method follows the naming scheme in the
12298 * "Unicode Name Property" section of the Unicode Standard. For other
12299 * code points, such as Hangul/Ideographs, The name generation rule above
12300 * differs from the one defined in the Unicode Standard.
12301 *
12302 * @param codePoint the character (Unicode code point)
12303 *
12304 * @return the name of the specified character, or null if
12305 * the code point is unassigned.
12306 *
12307 * @throws IllegalArgumentException if the specified
12308 * {@code codePoint} is not a valid Unicode
12309 * code point.
12310 *
12311 * @since 1.7
12312 */
12313 public static String getName(int codePoint) {
12314 if (!isValidCodePoint(codePoint)) {
12315 throw new IllegalArgumentException(
12316 String.format("Not a valid Unicode code point: 0x%X", codePoint));
12317 }
12318 String name = CharacterName.getInstance().getName(codePoint);
12319 if (name != null)
12320 return name;
12321 if (getType(codePoint) == UNASSIGNED)
12322 return null;
12323 UnicodeBlock block = UnicodeBlock.of(codePoint);
12324 if (block != null)
12325 return block.toString().replace('_', ' ') + " "
12326 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12327 // should never come here
12328 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12329 }
12330
12331 /**
12332 * Returns the code point value of the Unicode character specified by
12333 * the given character name.
12334 * <p>
12335 * If a character is not assigned a name by the <i>UnicodeData</i>
12336 * file (part of the Unicode Character Database maintained by the Unicode
12337 * Consortium), its name is defined as the result of the expression:
12338 *
12339 * <blockquote>{@code
12340 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12341 * + " "
12342 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12343 *
12344 * }</blockquote>
12345 * <p>
12346 * The {@code name} matching is case insensitive, with any leading and
12347 * trailing whitespace character removed.
12348 *
12349 * For the code points in the <i>UnicodeData</i> file, this method
12350 * recognizes the name which conforms to the name defined in the
12351 * "Unicode Name Property" section in the Unicode Standard. For other
12352 * code points, this method recognizes the name generated with
12353 * {@link #getName(int)} method.
12354 *
12355 * @param name the character name
12356 *
12357 * @return the code point value of the character specified by its name.
12358 *
12359 * @throws IllegalArgumentException if the specified {@code name}
12360 * is not a valid character name.
12361 * @throws NullPointerException if {@code name} is {@code null}
12362 *
12363 * @since 9
12364 */
12365 public static int codePointOf(String name) {
12366 name = name.trim().toUpperCase(Locale.ROOT);
12367 int cp = CharacterName.getInstance().getCodePoint(name);
12368 if (cp != -1)
12369 return cp;
12370 try {
12371 int off = name.lastIndexOf(' ');
12372 if (off != -1) {
12373 cp = Integer.parseInt(name, off + 1, name.length(), 16);
12374 if (isValidCodePoint(cp) && name.equals(getName(cp)))
12375 return cp;
12376 }
12377 } catch (Exception x) {}
12378 throw new IllegalArgumentException("Unrecognized character name :" + name);
12379 }
12380 }