1 /*
2 * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import jdk.internal.misc.CDS;
29 import jdk.internal.misc.PreviewFeatures;
30 import jdk.internal.value.DeserializeConstructor;
31 import jdk.internal.vm.annotation.IntrinsicCandidate;
32 import jdk.internal.vm.annotation.Stable;
33
34 import java.lang.constant.Constable;
35 import java.lang.constant.DynamicConstantDesc;
36 import java.util.Arrays;
37 import java.util.HashMap;
38 import java.util.Locale;
39 import java.util.Map;
40 import java.util.Objects;
41 import java.util.Optional;
42
43 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST;
44 import static java.lang.constant.ConstantDescs.CD_char;
45 import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
46
47 /**
48 * The {@code Character} class is the {@linkplain
49 * java.lang##wrapperClass wrapper class} for values of the primitive
50 * type {@code char}. An object of type {@code Character} contains a
51 * single field whose type is {@code char}.
52 *
53 * <p>In addition, this class provides a large number of static methods for
54 * determining a character's category (lowercase letter, digit, etc.)
55 * and for converting characters from uppercase to lowercase and vice
56 * versa.
57 *
58 * <h2><a id="conformance">Unicode Conformance</a></h2>
59 * <p>
60 * The fields and methods of class {@code Character} are defined in terms
61 * of character information from the Unicode Standard, specifically the
62 * <i>UnicodeData</i> file that is part of the Unicode Character Database.
63 * This file specifies properties including name and category for every
64 * assigned Unicode code point or character range. The file is available
65 * from the Unicode Consortium at
66 * <a href="http://www.unicode.org">http://www.unicode.org</a>.
67 * <p>
68 * Character information is based on the Unicode Standard, version 16.0.
69 * <p>
70 * The Java platform has supported different versions of the Unicode
71 * Standard over time. Upgrades to newer versions of the Unicode Standard
72 * occurred in the following Java releases, each indicating the new version:
73 * <table class="striped">
74 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption>
75 * <thead>
76 * <tr><th scope="col">Java release</th>
77 * <th scope="col">Unicode version</th></tr>
78 * </thead>
79 * <tbody>
80 * <tr><th scope="row" style="text-align:left">Java SE 24</th>
81 * <td>Unicode 16.0</td></tr>
82 * <tr><th scope="row" style="text-align:left">Java SE 22</th>
83 * <td>Unicode 15.1</td></tr>
84 * <tr><th scope="row" style="text-align:left">Java SE 20</th>
85 * <td>Unicode 15.0</td></tr>
86 * <tr><th scope="row" style="text-align:left">Java SE 19</th>
87 * <td>Unicode 14.0</td></tr>
88 * <tr><th scope="row" style="text-align:left">Java SE 15</th>
89 * <td>Unicode 13.0</td></tr>
90 * <tr><th scope="row" style="text-align:left">Java SE 13</th>
91 * <td>Unicode 12.1</td></tr>
92 * <tr><th scope="row" style="text-align:left">Java SE 12</th>
93 * <td>Unicode 11.0</td></tr>
94 * <tr><th scope="row" style="text-align:left">Java SE 11</th>
95 * <td>Unicode 10.0</td></tr>
96 * <tr><th scope="row" style="text-align:left">Java SE 9</th>
97 * <td>Unicode 8.0</td></tr>
98 * <tr><th scope="row" style="text-align:left">Java SE 8</th>
99 * <td>Unicode 6.2</td></tr>
100 * <tr><th scope="row" style="text-align:left">Java SE 7</th>
101 * <td>Unicode 6.0</td></tr>
102 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th>
103 * <td>Unicode 4.0</td></tr>
104 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th>
105 * <td>Unicode 3.0</td></tr>
106 * <tr><th scope="row" style="text-align:left">JDK 1.1</th>
107 * <td>Unicode 2.0</td></tr>
108 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th>
109 * <td>Unicode 1.1.5</td></tr>
110 * </tbody>
111 * </table>
112 * Variations from these base Unicode versions, such as recognized appendixes,
113 * are documented elsewhere.
114 * <h2><a id="unicode">Unicode Character Representations</a></h2>
115 *
116 * <p>The {@code char} data type (and therefore the value that a
117 * {@code Character} object encapsulates) are based on the
118 * original Unicode specification, which defined characters as
119 * fixed-width 16-bit entities. The Unicode Standard has since been
120 * changed to allow for characters whose representation requires more
121 * than 16 bits. The range of legal <em>code point</em>s is now
122 * U+0000 to U+10FFFF, known as
123 * <em><a href="https://www.unicode.org/glossary/#unicode_scalar_value">
124 * Unicode scalar value</a></em>.
125 *
126 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
127 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
128 * <a id="supplementary">Characters</a> whose code points are greater
129 * than U+FFFF are called <em>supplementary character</em>s. The Java
130 * platform uses the UTF-16 representation in {@code char} arrays and
131 * in the {@code String} and {@code StringBuffer} classes. In
132 * this representation, supplementary characters are represented as a pair
133 * of {@code char} values, the first from the <em>high-surrogates</em>
134 * range, (\uD800-\uDBFF), the second from the
135 * <em>low-surrogates</em> range (\uDC00-\uDFFF).
136 *
137 * <p>A {@code char} value, therefore, represents Basic
138 * Multilingual Plane (BMP) code points, including the surrogate
139 * code points, or code units of the UTF-16 encoding. An
140 * {@code int} value represents all Unicode code points,
141 * including supplementary code points. The lower (least significant)
142 * 21 bits of {@code int} are used to represent Unicode code
143 * points and the upper (most significant) 11 bits must be zero.
144 * Unless otherwise specified, the behavior with respect to
145 * supplementary characters and surrogate {@code char} values is
146 * as follows:
147 *
148 * <ul>
149 * <li>The methods that only accept a {@code char} value cannot support
150 * supplementary characters. They treat {@code char} values from the
151 * surrogate ranges as undefined characters. For example,
152 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
153 * this specific value if followed by any low-surrogate value in a string
154 * would represent a letter.
155 *
156 * <li>The methods that accept an {@code int} value support all
157 * Unicode characters, including supplementary characters. For
158 * example, {@code Character.isLetter(0x2F81A)} returns
159 * {@code true} because the code point value represents a letter
160 * (a CJK ideograph).
161 * </ul>
162 *
163 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
164 * used for character values in the range between U+0000 and U+10FFFF,
165 * and <em>Unicode code unit</em> is used for 16-bit
166 * {@code char} values that are code units of the <em>UTF-16</em>
167 * encoding. For more information on Unicode terminology, refer to the
168 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
169 *
170 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
171 * class; programmers should treat instances that are {@linkplain #equals(Object) equal}
172 * as interchangeable and should not use instances for synchronization, mutexes, or
173 * with {@linkplain java.lang.ref.Reference object references}.
174 *
175 * <div class="preview-block">
176 * <div class="preview-comment">
177 * When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}.
178 * Use of value class instances for synchronization, mutexes, or with
179 * {@linkplain java.lang.ref.Reference object references} result in
180 * {@link IdentityException}.
181 * </div>
182 * </div>
183 *
184 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
185 * @author Lee Boynton
186 * @author Guy Steele
187 * @author Akira Tanaka
188 * @author Martin Buchholz
189 * @author Ulf Zibis
190 * @since 1.0
191 */
192 @jdk.internal.MigratedValueClass
193 @jdk.internal.ValueBased
194 public final class Character implements java.io.Serializable, Comparable<Character>, Constable {
195 /**
196 * The minimum radix available for conversion to and from strings.
197 * The constant value of this field is the smallest value permitted
198 * for the radix argument in radix-conversion methods such as the
199 * {@code digit} method, the {@code forDigit} method, and the
200 * {@code toString} method of class {@code Integer}.
201 *
202 * @see Character#digit(char, int)
203 * @see Character#forDigit(int, int)
204 * @see Integer#toString(int, int)
205 * @see Integer#valueOf(String)
206 */
207 public static final int MIN_RADIX = 2;
208
209 /**
210 * The maximum radix available for conversion to and from strings.
211 * The constant value of this field is the largest value permitted
212 * for the radix argument in radix-conversion methods such as the
213 * {@code digit} method, the {@code forDigit} method, and the
214 * {@code toString} method of class {@code Integer}.
215 *
216 * @see Character#digit(char, int)
217 * @see Character#forDigit(int, int)
218 * @see Integer#toString(int, int)
219 * @see Integer#valueOf(String)
220 */
221 public static final int MAX_RADIX = 36;
222
223 /**
224 * The constant value of this field is the smallest value of type
225 * {@code char}, {@code '\u005Cu0000'}.
226 *
227 * @since 1.0.2
228 */
229 public static final char MIN_VALUE = '\u0000';
230
231 /**
232 * The constant value of this field is the largest value of type
233 * {@code char}, {@code '\u005CuFFFF'}.
234 *
235 * @since 1.0.2
236 */
237 public static final char MAX_VALUE = '\uFFFF';
238
239 /**
240 * The {@code Class} instance representing the primitive type
241 * {@code char}.
242 *
243 * @since 1.1
244 */
245 public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
246
247 /*
248 * Normative general types
249 */
250
251 /*
252 * General character types
253 */
254
255 /**
256 * General category "Cn" in the Unicode specification.
257 * @since 1.1
258 */
259 public static final byte UNASSIGNED = 0;
260
261 /**
262 * General category "Lu" in the Unicode specification.
263 * @since 1.1
264 */
265 public static final byte UPPERCASE_LETTER = 1;
266
267 /**
268 * General category "Ll" in the Unicode specification.
269 * @since 1.1
270 */
271 public static final byte LOWERCASE_LETTER = 2;
272
273 /**
274 * General category "Lt" in the Unicode specification.
275 * @since 1.1
276 */
277 public static final byte TITLECASE_LETTER = 3;
278
279 /**
280 * General category "Lm" in the Unicode specification.
281 * @since 1.1
282 */
283 public static final byte MODIFIER_LETTER = 4;
284
285 /**
286 * General category "Lo" in the Unicode specification.
287 * @since 1.1
288 */
289 public static final byte OTHER_LETTER = 5;
290
291 /**
292 * General category "Mn" in the Unicode specification.
293 * @since 1.1
294 */
295 public static final byte NON_SPACING_MARK = 6;
296
297 /**
298 * General category "Me" in the Unicode specification.
299 * @since 1.1
300 */
301 public static final byte ENCLOSING_MARK = 7;
302
303 /**
304 * General category "Mc" in the Unicode specification.
305 * @since 1.1
306 */
307 public static final byte COMBINING_SPACING_MARK = 8;
308
309 /**
310 * General category "Nd" in the Unicode specification.
311 * @since 1.1
312 */
313 public static final byte DECIMAL_DIGIT_NUMBER = 9;
314
315 /**
316 * General category "Nl" in the Unicode specification.
317 * @since 1.1
318 */
319 public static final byte LETTER_NUMBER = 10;
320
321 /**
322 * General category "No" in the Unicode specification.
323 * @since 1.1
324 */
325 public static final byte OTHER_NUMBER = 11;
326
327 /**
328 * General category "Zs" in the Unicode specification.
329 * @since 1.1
330 */
331 public static final byte SPACE_SEPARATOR = 12;
332
333 /**
334 * General category "Zl" in the Unicode specification.
335 * @since 1.1
336 */
337 public static final byte LINE_SEPARATOR = 13;
338
339 /**
340 * General category "Zp" in the Unicode specification.
341 * @since 1.1
342 */
343 public static final byte PARAGRAPH_SEPARATOR = 14;
344
345 /**
346 * General category "Cc" in the Unicode specification.
347 * @since 1.1
348 */
349 public static final byte CONTROL = 15;
350
351 /**
352 * General category "Cf" in the Unicode specification.
353 * @since 1.1
354 */
355 public static final byte FORMAT = 16;
356
357 /**
358 * General category "Co" in the Unicode specification.
359 * @since 1.1
360 */
361 public static final byte PRIVATE_USE = 18;
362
363 /**
364 * General category "Cs" in the Unicode specification.
365 * @since 1.1
366 */
367 public static final byte SURROGATE = 19;
368
369 /**
370 * General category "Pd" in the Unicode specification.
371 * @since 1.1
372 */
373 public static final byte DASH_PUNCTUATION = 20;
374
375 /**
376 * General category "Ps" in the Unicode specification.
377 * @since 1.1
378 */
379 public static final byte START_PUNCTUATION = 21;
380
381 /**
382 * General category "Pe" in the Unicode specification.
383 * @since 1.1
384 */
385 public static final byte END_PUNCTUATION = 22;
386
387 /**
388 * General category "Pc" in the Unicode specification.
389 * @since 1.1
390 */
391 public static final byte CONNECTOR_PUNCTUATION = 23;
392
393 /**
394 * General category "Po" in the Unicode specification.
395 * @since 1.1
396 */
397 public static final byte OTHER_PUNCTUATION = 24;
398
399 /**
400 * General category "Sm" in the Unicode specification.
401 * @since 1.1
402 */
403 public static final byte MATH_SYMBOL = 25;
404
405 /**
406 * General category "Sc" in the Unicode specification.
407 * @since 1.1
408 */
409 public static final byte CURRENCY_SYMBOL = 26;
410
411 /**
412 * General category "Sk" in the Unicode specification.
413 * @since 1.1
414 */
415 public static final byte MODIFIER_SYMBOL = 27;
416
417 /**
418 * General category "So" in the Unicode specification.
419 * @since 1.1
420 */
421 public static final byte OTHER_SYMBOL = 28;
422
423 /**
424 * General category "Pi" in the Unicode specification.
425 * @since 1.4
426 */
427 public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
428
429 /**
430 * General category "Pf" in the Unicode specification.
431 * @since 1.4
432 */
433 public static final byte FINAL_QUOTE_PUNCTUATION = 30;
434
435 /**
436 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
437 */
438 static final int ERROR = 0xFFFFFFFF;
439
440
441 /**
442 * Undefined bidirectional character type. Undefined {@code char}
443 * values have undefined directionality in the Unicode specification.
444 * @since 1.4
445 */
446 public static final byte DIRECTIONALITY_UNDEFINED = -1;
447
448 /**
449 * Strong bidirectional character type "L" in the Unicode specification.
450 * @since 1.4
451 */
452 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
453
454 /**
455 * Strong bidirectional character type "R" in the Unicode specification.
456 * @since 1.4
457 */
458 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
459
460 /**
461 * Strong bidirectional character type "AL" in the Unicode specification.
462 * @since 1.4
463 */
464 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
465
466 /**
467 * Weak bidirectional character type "EN" in the Unicode specification.
468 * @since 1.4
469 */
470 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
471
472 /**
473 * Weak bidirectional character type "ES" in the Unicode specification.
474 * @since 1.4
475 */
476 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
477
478 /**
479 * Weak bidirectional character type "ET" in the Unicode specification.
480 * @since 1.4
481 */
482 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
483
484 /**
485 * Weak bidirectional character type "AN" in the Unicode specification.
486 * @since 1.4
487 */
488 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
489
490 /**
491 * Weak bidirectional character type "CS" in the Unicode specification.
492 * @since 1.4
493 */
494 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
495
496 /**
497 * Weak bidirectional character type "NSM" in the Unicode specification.
498 * @since 1.4
499 */
500 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
501
502 /**
503 * Weak bidirectional character type "BN" in the Unicode specification.
504 * @since 1.4
505 */
506 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
507
508 /**
509 * Neutral bidirectional character type "B" in the Unicode specification.
510 * @since 1.4
511 */
512 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
513
514 /**
515 * Neutral bidirectional character type "S" in the Unicode specification.
516 * @since 1.4
517 */
518 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
519
520 /**
521 * Neutral bidirectional character type "WS" in the Unicode specification.
522 * @since 1.4
523 */
524 public static final byte DIRECTIONALITY_WHITESPACE = 12;
525
526 /**
527 * Neutral bidirectional character type "ON" in the Unicode specification.
528 * @since 1.4
529 */
530 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
531
532 /**
533 * Strong bidirectional character type "LRE" in the Unicode specification.
534 * @since 1.4
535 */
536 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
537
538 /**
539 * Strong bidirectional character type "LRO" in the Unicode specification.
540 * @since 1.4
541 */
542 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
543
544 /**
545 * Strong bidirectional character type "RLE" in the Unicode specification.
546 * @since 1.4
547 */
548 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
549
550 /**
551 * Strong bidirectional character type "RLO" in the Unicode specification.
552 * @since 1.4
553 */
554 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
555
556 /**
557 * Weak bidirectional character type "PDF" in the Unicode specification.
558 * @since 1.4
559 */
560 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
561
562 /**
563 * Weak bidirectional character type "LRI" in the Unicode specification.
564 * @since 9
565 */
566 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
567
568 /**
569 * Weak bidirectional character type "RLI" in the Unicode specification.
570 * @since 9
571 */
572 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
573
574 /**
575 * Weak bidirectional character type "FSI" in the Unicode specification.
576 * @since 9
577 */
578 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
579
580 /**
581 * Weak bidirectional character type "PDI" in the Unicode specification.
582 * @since 9
583 */
584 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
585
586 /**
587 * The minimum value of a
588 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
589 * Unicode high-surrogate code unit</a>
590 * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
591 * A high-surrogate is also known as a <i>leading-surrogate</i>.
592 *
593 * @since 1.5
594 */
595 public static final char MIN_HIGH_SURROGATE = '\uD800';
596
597 /**
598 * The maximum value of a
599 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
600 * Unicode high-surrogate code unit</a>
601 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
602 * A high-surrogate is also known as a <i>leading-surrogate</i>.
603 *
604 * @since 1.5
605 */
606 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
607
608 /**
609 * The minimum value of a
610 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
611 * Unicode low-surrogate code unit</a>
612 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
613 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
614 *
615 * @since 1.5
616 */
617 public static final char MIN_LOW_SURROGATE = '\uDC00';
618
619 /**
620 * The maximum value of a
621 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
622 * Unicode low-surrogate code unit</a>
623 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
624 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
625 *
626 * @since 1.5
627 */
628 public static final char MAX_LOW_SURROGATE = '\uDFFF';
629
630 /**
631 * The minimum value of a Unicode surrogate code unit in the
632 * UTF-16 encoding, constant {@code '\u005CuD800'}.
633 *
634 * @since 1.5
635 */
636 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
637
638 /**
639 * The maximum value of a Unicode surrogate code unit in the
640 * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
641 *
642 * @since 1.5
643 */
644 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
645
646 /**
647 * The minimum value of a
648 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
649 * Unicode supplementary code point</a>, constant {@code U+10000}.
650 *
651 * @since 1.5
652 */
653 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
654
655 /**
656 * The minimum value of a
657 * <a href="http://www.unicode.org/glossary/#code_point">
658 * Unicode code point</a>, constant {@code U+0000}.
659 *
660 * @since 1.5
661 */
662 public static final int MIN_CODE_POINT = 0x000000;
663
664 /**
665 * The maximum value of a
666 * <a href="http://www.unicode.org/glossary/#code_point">
667 * Unicode code point</a>, constant {@code U+10FFFF}.
668 *
669 * @since 1.5
670 */
671 public static final int MAX_CODE_POINT = 0X10FFFF;
672
673 /**
674 * Returns an {@link Optional} containing the nominal descriptor for this
675 * instance.
676 *
677 * @return an {@link Optional} describing the {@linkplain Character} instance
678 * @since 15
679 */
680 @Override
681 public Optional<DynamicConstantDesc<Character>> describeConstable() {
682 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value));
683 }
684
685 /**
686 * Instances of this class represent particular subsets of the Unicode
687 * character set. The only family of subsets defined in the
688 * {@code Character} class is {@link Character.UnicodeBlock}.
689 * Other portions of the Java API may define other subsets for their
690 * own purposes.
691 *
692 * @since 1.2
693 */
694 public static class Subset {
695
696 private String name;
697
698 /**
699 * Constructs a new {@code Subset} instance.
700 *
701 * @param name The name of this subset
702 * @throws NullPointerException if name is {@code null}
703 */
704 protected Subset(String name) {
705 if (name == null) {
706 throw new NullPointerException("name");
707 }
708 this.name = name;
709 }
710
711 /**
712 * Compares two {@code Subset} objects for equality.
713 * This method returns {@code true} if and only if
714 * {@code this} and the argument refer to the same
715 * object; since this method is {@code final}, this
716 * guarantee holds for all subclasses.
717 */
718 public final boolean equals(Object obj) {
719 return (this == obj);
720 }
721
722 /**
723 * Returns the standard hash code as defined by the
724 * {@link Object#hashCode} method. This method
725 * is {@code final} in order to ensure that the
726 * {@code equals} and {@code hashCode} methods will
727 * be consistent in all subclasses.
728 */
729 public final int hashCode() {
730 return super.hashCode();
731 }
732
733 /**
734 * Returns the name of this subset.
735 */
736 public final String toString() {
737 return name;
738 }
739 }
740
741 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
742 // for the latest specification of Unicode Blocks.
743
744 /**
745 * A family of character subsets representing the character blocks in the
746 * Unicode specification. Character blocks generally define characters
747 * used for a specific script or purpose. A character is contained by
748 * at most one Unicode block.
749 *
750 * @since 1.2
751 */
752 public static final class UnicodeBlock extends Subset {
753 /**
754 * NUM_ENTITIES should match the total number of UnicodeBlocks.
755 * It should be adjusted whenever the Unicode Character Database
756 * is upgraded.
757 */
758 private static final int NUM_ENTITIES = 782;
759 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES);
760
761 /**
762 * Creates a UnicodeBlock with the given identifier name.
763 * This name must be the same as the block identifier.
764 */
765 private UnicodeBlock(String idName) {
766 super(idName);
767 map.put(idName, this);
768 }
769
770 /**
771 * Creates a UnicodeBlock with the given identifier name and
772 * alias name.
773 */
774 private UnicodeBlock(String idName, String alias) {
775 this(idName);
776 map.put(alias, this);
777 }
778
779 /**
780 * Creates a UnicodeBlock with the given identifier name and
781 * alias names.
782 */
783 private UnicodeBlock(String idName, String... aliases) {
784 this(idName);
785 for (String alias : aliases)
786 map.put(alias, this);
787 }
788
789 /**
790 * Constant for the "Basic Latin" Unicode character block.
791 * @since 1.2
792 */
793 public static final UnicodeBlock BASIC_LATIN =
794 new UnicodeBlock("BASIC_LATIN",
795 "BASIC LATIN",
796 "BASICLATIN");
797
798 /**
799 * Constant for the "Latin-1 Supplement" Unicode character block.
800 * @since 1.2
801 */
802 public static final UnicodeBlock LATIN_1_SUPPLEMENT =
803 new UnicodeBlock("LATIN_1_SUPPLEMENT",
804 "LATIN-1 SUPPLEMENT",
805 "LATIN-1SUPPLEMENT");
806
807 /**
808 * Constant for the "Latin Extended-A" Unicode character block.
809 * @since 1.2
810 */
811 public static final UnicodeBlock LATIN_EXTENDED_A =
812 new UnicodeBlock("LATIN_EXTENDED_A",
813 "LATIN EXTENDED-A",
814 "LATINEXTENDED-A");
815
816 /**
817 * Constant for the "Latin Extended-B" Unicode character block.
818 * @since 1.2
819 */
820 public static final UnicodeBlock LATIN_EXTENDED_B =
821 new UnicodeBlock("LATIN_EXTENDED_B",
822 "LATIN EXTENDED-B",
823 "LATINEXTENDED-B");
824
825 /**
826 * Constant for the "IPA Extensions" Unicode character block.
827 * @since 1.2
828 */
829 public static final UnicodeBlock IPA_EXTENSIONS =
830 new UnicodeBlock("IPA_EXTENSIONS",
831 "IPA EXTENSIONS",
832 "IPAEXTENSIONS");
833
834 /**
835 * Constant for the "Spacing Modifier Letters" Unicode character block.
836 * @since 1.2
837 */
838 public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
839 new UnicodeBlock("SPACING_MODIFIER_LETTERS",
840 "SPACING MODIFIER LETTERS",
841 "SPACINGMODIFIERLETTERS");
842
843 /**
844 * Constant for the "Combining Diacritical Marks" Unicode character block.
845 * @since 1.2
846 */
847 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
848 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
849 "COMBINING DIACRITICAL MARKS",
850 "COMBININGDIACRITICALMARKS");
851
852 /**
853 * Constant for the "Greek and Coptic" Unicode character block.
854 * <p>
855 * This block was previously known as the "Greek" block.
856 *
857 * @since 1.2
858 */
859 public static final UnicodeBlock GREEK =
860 new UnicodeBlock("GREEK",
861 "GREEK AND COPTIC",
862 "GREEKANDCOPTIC");
863
864 /**
865 * Constant for the "Cyrillic" Unicode character block.
866 * @since 1.2
867 */
868 public static final UnicodeBlock CYRILLIC =
869 new UnicodeBlock("CYRILLIC");
870
871 /**
872 * Constant for the "Armenian" Unicode character block.
873 * @since 1.2
874 */
875 public static final UnicodeBlock ARMENIAN =
876 new UnicodeBlock("ARMENIAN");
877
878 /**
879 * Constant for the "Hebrew" Unicode character block.
880 * @since 1.2
881 */
882 public static final UnicodeBlock HEBREW =
883 new UnicodeBlock("HEBREW");
884
885 /**
886 * Constant for the "Arabic" Unicode character block.
887 * @since 1.2
888 */
889 public static final UnicodeBlock ARABIC =
890 new UnicodeBlock("ARABIC");
891
892 /**
893 * Constant for the "Devanagari" Unicode character block.
894 * @since 1.2
895 */
896 public static final UnicodeBlock DEVANAGARI =
897 new UnicodeBlock("DEVANAGARI");
898
899 /**
900 * Constant for the "Bengali" Unicode character block.
901 * @since 1.2
902 */
903 public static final UnicodeBlock BENGALI =
904 new UnicodeBlock("BENGALI");
905
906 /**
907 * Constant for the "Gurmukhi" Unicode character block.
908 * @since 1.2
909 */
910 public static final UnicodeBlock GURMUKHI =
911 new UnicodeBlock("GURMUKHI");
912
913 /**
914 * Constant for the "Gujarati" Unicode character block.
915 * @since 1.2
916 */
917 public static final UnicodeBlock GUJARATI =
918 new UnicodeBlock("GUJARATI");
919
920 /**
921 * Constant for the "Oriya" Unicode character block.
922 * @since 1.2
923 */
924 public static final UnicodeBlock ORIYA =
925 new UnicodeBlock("ORIYA");
926
927 /**
928 * Constant for the "Tamil" Unicode character block.
929 * @since 1.2
930 */
931 public static final UnicodeBlock TAMIL =
932 new UnicodeBlock("TAMIL");
933
934 /**
935 * Constant for the "Telugu" Unicode character block.
936 * @since 1.2
937 */
938 public static final UnicodeBlock TELUGU =
939 new UnicodeBlock("TELUGU");
940
941 /**
942 * Constant for the "Kannada" Unicode character block.
943 * @since 1.2
944 */
945 public static final UnicodeBlock KANNADA =
946 new UnicodeBlock("KANNADA");
947
948 /**
949 * Constant for the "Malayalam" Unicode character block.
950 * @since 1.2
951 */
952 public static final UnicodeBlock MALAYALAM =
953 new UnicodeBlock("MALAYALAM");
954
955 /**
956 * Constant for the "Thai" Unicode character block.
957 * @since 1.2
958 */
959 public static final UnicodeBlock THAI =
960 new UnicodeBlock("THAI");
961
962 /**
963 * Constant for the "Lao" Unicode character block.
964 * @since 1.2
965 */
966 public static final UnicodeBlock LAO =
967 new UnicodeBlock("LAO");
968
969 /**
970 * Constant for the "Tibetan" Unicode character block.
971 * @since 1.2
972 */
973 public static final UnicodeBlock TIBETAN =
974 new UnicodeBlock("TIBETAN");
975
976 /**
977 * Constant for the "Georgian" Unicode character block.
978 * @since 1.2
979 */
980 public static final UnicodeBlock GEORGIAN =
981 new UnicodeBlock("GEORGIAN");
982
983 /**
984 * Constant for the "Hangul Jamo" Unicode character block.
985 * @since 1.2
986 */
987 public static final UnicodeBlock HANGUL_JAMO =
988 new UnicodeBlock("HANGUL_JAMO",
989 "HANGUL JAMO",
990 "HANGULJAMO");
991
992 /**
993 * Constant for the "Latin Extended Additional" Unicode character block.
994 * @since 1.2
995 */
996 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
997 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
998 "LATIN EXTENDED ADDITIONAL",
999 "LATINEXTENDEDADDITIONAL");
1000
1001 /**
1002 * Constant for the "Greek Extended" Unicode character block.
1003 * @since 1.2
1004 */
1005 public static final UnicodeBlock GREEK_EXTENDED =
1006 new UnicodeBlock("GREEK_EXTENDED",
1007 "GREEK EXTENDED",
1008 "GREEKEXTENDED");
1009
1010 /**
1011 * Constant for the "General Punctuation" Unicode character block.
1012 * @since 1.2
1013 */
1014 public static final UnicodeBlock GENERAL_PUNCTUATION =
1015 new UnicodeBlock("GENERAL_PUNCTUATION",
1016 "GENERAL PUNCTUATION",
1017 "GENERALPUNCTUATION");
1018
1019 /**
1020 * Constant for the "Superscripts and Subscripts" Unicode character
1021 * block.
1022 * @since 1.2
1023 */
1024 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
1025 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
1026 "SUPERSCRIPTS AND SUBSCRIPTS",
1027 "SUPERSCRIPTSANDSUBSCRIPTS");
1028
1029 /**
1030 * Constant for the "Currency Symbols" Unicode character block.
1031 * @since 1.2
1032 */
1033 public static final UnicodeBlock CURRENCY_SYMBOLS =
1034 new UnicodeBlock("CURRENCY_SYMBOLS",
1035 "CURRENCY SYMBOLS",
1036 "CURRENCYSYMBOLS");
1037
1038 /**
1039 * Constant for the "Combining Diacritical Marks for Symbols" Unicode
1040 * character block.
1041 * <p>
1042 * This block was previously known as "Combining Marks for Symbols".
1043 * @since 1.2
1044 */
1045 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
1046 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
1047 "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
1048 "COMBININGDIACRITICALMARKSFORSYMBOLS",
1049 "COMBINING MARKS FOR SYMBOLS",
1050 "COMBININGMARKSFORSYMBOLS");
1051
1052 /**
1053 * Constant for the "Letterlike Symbols" Unicode character block.
1054 * @since 1.2
1055 */
1056 public static final UnicodeBlock LETTERLIKE_SYMBOLS =
1057 new UnicodeBlock("LETTERLIKE_SYMBOLS",
1058 "LETTERLIKE SYMBOLS",
1059 "LETTERLIKESYMBOLS");
1060
1061 /**
1062 * Constant for the "Number Forms" Unicode character block.
1063 * @since 1.2
1064 */
1065 public static final UnicodeBlock NUMBER_FORMS =
1066 new UnicodeBlock("NUMBER_FORMS",
1067 "NUMBER FORMS",
1068 "NUMBERFORMS");
1069
1070 /**
1071 * Constant for the "Arrows" Unicode character block.
1072 * @since 1.2
1073 */
1074 public static final UnicodeBlock ARROWS =
1075 new UnicodeBlock("ARROWS");
1076
1077 /**
1078 * Constant for the "Mathematical Operators" Unicode character block.
1079 * @since 1.2
1080 */
1081 public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1082 new UnicodeBlock("MATHEMATICAL_OPERATORS",
1083 "MATHEMATICAL OPERATORS",
1084 "MATHEMATICALOPERATORS");
1085
1086 /**
1087 * Constant for the "Miscellaneous Technical" Unicode character block.
1088 * @since 1.2
1089 */
1090 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1091 new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1092 "MISCELLANEOUS TECHNICAL",
1093 "MISCELLANEOUSTECHNICAL");
1094
1095 /**
1096 * Constant for the "Control Pictures" Unicode character block.
1097 * @since 1.2
1098 */
1099 public static final UnicodeBlock CONTROL_PICTURES =
1100 new UnicodeBlock("CONTROL_PICTURES",
1101 "CONTROL PICTURES",
1102 "CONTROLPICTURES");
1103
1104 /**
1105 * Constant for the "Optical Character Recognition" Unicode character block.
1106 * @since 1.2
1107 */
1108 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1109 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1110 "OPTICAL CHARACTER RECOGNITION",
1111 "OPTICALCHARACTERRECOGNITION");
1112
1113 /**
1114 * Constant for the "Enclosed Alphanumerics" Unicode character block.
1115 * @since 1.2
1116 */
1117 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1118 new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1119 "ENCLOSED ALPHANUMERICS",
1120 "ENCLOSEDALPHANUMERICS");
1121
1122 /**
1123 * Constant for the "Box Drawing" Unicode character block.
1124 * @since 1.2
1125 */
1126 public static final UnicodeBlock BOX_DRAWING =
1127 new UnicodeBlock("BOX_DRAWING",
1128 "BOX DRAWING",
1129 "BOXDRAWING");
1130
1131 /**
1132 * Constant for the "Block Elements" Unicode character block.
1133 * @since 1.2
1134 */
1135 public static final UnicodeBlock BLOCK_ELEMENTS =
1136 new UnicodeBlock("BLOCK_ELEMENTS",
1137 "BLOCK ELEMENTS",
1138 "BLOCKELEMENTS");
1139
1140 /**
1141 * Constant for the "Geometric Shapes" Unicode character block.
1142 * @since 1.2
1143 */
1144 public static final UnicodeBlock GEOMETRIC_SHAPES =
1145 new UnicodeBlock("GEOMETRIC_SHAPES",
1146 "GEOMETRIC SHAPES",
1147 "GEOMETRICSHAPES");
1148
1149 /**
1150 * Constant for the "Miscellaneous Symbols" Unicode character block.
1151 * @since 1.2
1152 */
1153 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1154 new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1155 "MISCELLANEOUS SYMBOLS",
1156 "MISCELLANEOUSSYMBOLS");
1157
1158 /**
1159 * Constant for the "Dingbats" Unicode character block.
1160 * @since 1.2
1161 */
1162 public static final UnicodeBlock DINGBATS =
1163 new UnicodeBlock("DINGBATS");
1164
1165 /**
1166 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1167 * @since 1.2
1168 */
1169 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1170 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1171 "CJK SYMBOLS AND PUNCTUATION",
1172 "CJKSYMBOLSANDPUNCTUATION");
1173
1174 /**
1175 * Constant for the "Hiragana" Unicode character block.
1176 * @since 1.2
1177 */
1178 public static final UnicodeBlock HIRAGANA =
1179 new UnicodeBlock("HIRAGANA");
1180
1181 /**
1182 * Constant for the "Katakana" Unicode character block.
1183 * @since 1.2
1184 */
1185 public static final UnicodeBlock KATAKANA =
1186 new UnicodeBlock("KATAKANA");
1187
1188 /**
1189 * Constant for the "Bopomofo" Unicode character block.
1190 * @since 1.2
1191 */
1192 public static final UnicodeBlock BOPOMOFO =
1193 new UnicodeBlock("BOPOMOFO");
1194
1195 /**
1196 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1197 * @since 1.2
1198 */
1199 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1200 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1201 "HANGUL COMPATIBILITY JAMO",
1202 "HANGULCOMPATIBILITYJAMO");
1203
1204 /**
1205 * Constant for the "Kanbun" Unicode character block.
1206 * @since 1.2
1207 */
1208 public static final UnicodeBlock KANBUN =
1209 new UnicodeBlock("KANBUN");
1210
1211 /**
1212 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1213 * @since 1.2
1214 */
1215 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1216 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1217 "ENCLOSED CJK LETTERS AND MONTHS",
1218 "ENCLOSEDCJKLETTERSANDMONTHS");
1219
1220 /**
1221 * Constant for the "CJK Compatibility" Unicode character block.
1222 * @since 1.2
1223 */
1224 public static final UnicodeBlock CJK_COMPATIBILITY =
1225 new UnicodeBlock("CJK_COMPATIBILITY",
1226 "CJK COMPATIBILITY",
1227 "CJKCOMPATIBILITY");
1228
1229 /**
1230 * Constant for the "CJK Unified Ideographs" Unicode character block.
1231 * @since 1.2
1232 */
1233 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1234 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1235 "CJK UNIFIED IDEOGRAPHS",
1236 "CJKUNIFIEDIDEOGRAPHS");
1237
1238 /**
1239 * Constant for the "Hangul Syllables" Unicode character block.
1240 * @since 1.2
1241 */
1242 public static final UnicodeBlock HANGUL_SYLLABLES =
1243 new UnicodeBlock("HANGUL_SYLLABLES",
1244 "HANGUL SYLLABLES",
1245 "HANGULSYLLABLES");
1246
1247 /**
1248 * Constant for the "Private Use Area" Unicode character block.
1249 * @since 1.2
1250 */
1251 public static final UnicodeBlock PRIVATE_USE_AREA =
1252 new UnicodeBlock("PRIVATE_USE_AREA",
1253 "PRIVATE USE AREA",
1254 "PRIVATEUSEAREA");
1255
1256 /**
1257 * Constant for the "CJK Compatibility Ideographs" Unicode character
1258 * block.
1259 * @since 1.2
1260 */
1261 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1262 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1263 "CJK COMPATIBILITY IDEOGRAPHS",
1264 "CJKCOMPATIBILITYIDEOGRAPHS");
1265
1266 /**
1267 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1268 * @since 1.2
1269 */
1270 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1271 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1272 "ALPHABETIC PRESENTATION FORMS",
1273 "ALPHABETICPRESENTATIONFORMS");
1274
1275 /**
1276 * Constant for the "Arabic Presentation Forms-A" Unicode character
1277 * block.
1278 * @since 1.2
1279 */
1280 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1281 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1282 "ARABIC PRESENTATION FORMS-A",
1283 "ARABICPRESENTATIONFORMS-A");
1284
1285 /**
1286 * Constant for the "Combining Half Marks" Unicode character block.
1287 * @since 1.2
1288 */
1289 public static final UnicodeBlock COMBINING_HALF_MARKS =
1290 new UnicodeBlock("COMBINING_HALF_MARKS",
1291 "COMBINING HALF MARKS",
1292 "COMBININGHALFMARKS");
1293
1294 /**
1295 * Constant for the "CJK Compatibility Forms" Unicode character block.
1296 * @since 1.2
1297 */
1298 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1299 new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1300 "CJK COMPATIBILITY FORMS",
1301 "CJKCOMPATIBILITYFORMS");
1302
1303 /**
1304 * Constant for the "Small Form Variants" Unicode character block.
1305 * @since 1.2
1306 */
1307 public static final UnicodeBlock SMALL_FORM_VARIANTS =
1308 new UnicodeBlock("SMALL_FORM_VARIANTS",
1309 "SMALL FORM VARIANTS",
1310 "SMALLFORMVARIANTS");
1311
1312 /**
1313 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1314 * @since 1.2
1315 */
1316 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1317 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1318 "ARABIC PRESENTATION FORMS-B",
1319 "ARABICPRESENTATIONFORMS-B");
1320
1321 /**
1322 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1323 * block.
1324 * @since 1.2
1325 */
1326 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1327 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1328 "HALFWIDTH AND FULLWIDTH FORMS",
1329 "HALFWIDTHANDFULLWIDTHFORMS");
1330
1331 /**
1332 * Constant for the "Specials" Unicode character block.
1333 * @since 1.2
1334 */
1335 public static final UnicodeBlock SPECIALS =
1336 new UnicodeBlock("SPECIALS");
1337
1338 /**
1339 * @deprecated
1340 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1341 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1342 * These constants match the block definitions of the Unicode Standard.
1343 * The {@link #of(char)} and {@link #of(int)} methods return the
1344 * standard constants.
1345 */
1346 @Deprecated(since="1.5")
1347 public static final UnicodeBlock SURROGATES_AREA =
1348 new UnicodeBlock("SURROGATES_AREA");
1349
1350 /**
1351 * Constant for the "Syriac" Unicode character block.
1352 * @since 1.4
1353 */
1354 public static final UnicodeBlock SYRIAC =
1355 new UnicodeBlock("SYRIAC");
1356
1357 /**
1358 * Constant for the "Thaana" Unicode character block.
1359 * @since 1.4
1360 */
1361 public static final UnicodeBlock THAANA =
1362 new UnicodeBlock("THAANA");
1363
1364 /**
1365 * Constant for the "Sinhala" Unicode character block.
1366 * @since 1.4
1367 */
1368 public static final UnicodeBlock SINHALA =
1369 new UnicodeBlock("SINHALA");
1370
1371 /**
1372 * Constant for the "Myanmar" Unicode character block.
1373 * @since 1.4
1374 */
1375 public static final UnicodeBlock MYANMAR =
1376 new UnicodeBlock("MYANMAR");
1377
1378 /**
1379 * Constant for the "Ethiopic" Unicode character block.
1380 * @since 1.4
1381 */
1382 public static final UnicodeBlock ETHIOPIC =
1383 new UnicodeBlock("ETHIOPIC");
1384
1385 /**
1386 * Constant for the "Cherokee" Unicode character block.
1387 * @since 1.4
1388 */
1389 public static final UnicodeBlock CHEROKEE =
1390 new UnicodeBlock("CHEROKEE");
1391
1392 /**
1393 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1394 * @since 1.4
1395 */
1396 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1397 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1398 "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1399 "UNIFIEDCANADIANABORIGINALSYLLABICS");
1400
1401 /**
1402 * Constant for the "Ogham" Unicode character block.
1403 * @since 1.4
1404 */
1405 public static final UnicodeBlock OGHAM =
1406 new UnicodeBlock("OGHAM");
1407
1408 /**
1409 * Constant for the "Runic" Unicode character block.
1410 * @since 1.4
1411 */
1412 public static final UnicodeBlock RUNIC =
1413 new UnicodeBlock("RUNIC");
1414
1415 /**
1416 * Constant for the "Khmer" Unicode character block.
1417 * @since 1.4
1418 */
1419 public static final UnicodeBlock KHMER =
1420 new UnicodeBlock("KHMER");
1421
1422 /**
1423 * Constant for the "Mongolian" Unicode character block.
1424 * @since 1.4
1425 */
1426 public static final UnicodeBlock MONGOLIAN =
1427 new UnicodeBlock("MONGOLIAN");
1428
1429 /**
1430 * Constant for the "Braille Patterns" Unicode character block.
1431 * @since 1.4
1432 */
1433 public static final UnicodeBlock BRAILLE_PATTERNS =
1434 new UnicodeBlock("BRAILLE_PATTERNS",
1435 "BRAILLE PATTERNS",
1436 "BRAILLEPATTERNS");
1437
1438 /**
1439 * Constant for the "CJK Radicals Supplement" Unicode character block.
1440 * @since 1.4
1441 */
1442 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1443 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1444 "CJK RADICALS SUPPLEMENT",
1445 "CJKRADICALSSUPPLEMENT");
1446
1447 /**
1448 * Constant for the "Kangxi Radicals" Unicode character block.
1449 * @since 1.4
1450 */
1451 public static final UnicodeBlock KANGXI_RADICALS =
1452 new UnicodeBlock("KANGXI_RADICALS",
1453 "KANGXI RADICALS",
1454 "KANGXIRADICALS");
1455
1456 /**
1457 * Constant for the "Ideographic Description Characters" Unicode character block.
1458 * @since 1.4
1459 */
1460 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1461 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1462 "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1463 "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1464
1465 /**
1466 * Constant for the "Bopomofo Extended" Unicode character block.
1467 * @since 1.4
1468 */
1469 public static final UnicodeBlock BOPOMOFO_EXTENDED =
1470 new UnicodeBlock("BOPOMOFO_EXTENDED",
1471 "BOPOMOFO EXTENDED",
1472 "BOPOMOFOEXTENDED");
1473
1474 /**
1475 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1476 * @since 1.4
1477 */
1478 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1479 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1480 "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1481 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1482
1483 /**
1484 * Constant for the "Yi Syllables" Unicode character block.
1485 * @since 1.4
1486 */
1487 public static final UnicodeBlock YI_SYLLABLES =
1488 new UnicodeBlock("YI_SYLLABLES",
1489 "YI SYLLABLES",
1490 "YISYLLABLES");
1491
1492 /**
1493 * Constant for the "Yi Radicals" Unicode character block.
1494 * @since 1.4
1495 */
1496 public static final UnicodeBlock YI_RADICALS =
1497 new UnicodeBlock("YI_RADICALS",
1498 "YI RADICALS",
1499 "YIRADICALS");
1500
1501 /**
1502 * Constant for the "Cyrillic Supplement" Unicode character block.
1503 * This block was previously known as the "Cyrillic Supplementary" block.
1504 * @since 1.5
1505 */
1506 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1507 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1508 "CYRILLIC SUPPLEMENTARY",
1509 "CYRILLICSUPPLEMENTARY",
1510 "CYRILLIC SUPPLEMENT",
1511 "CYRILLICSUPPLEMENT");
1512
1513 /**
1514 * Constant for the "Tagalog" Unicode character block.
1515 * @since 1.5
1516 */
1517 public static final UnicodeBlock TAGALOG =
1518 new UnicodeBlock("TAGALOG");
1519
1520 /**
1521 * Constant for the "Hanunoo" Unicode character block.
1522 * @since 1.5
1523 */
1524 public static final UnicodeBlock HANUNOO =
1525 new UnicodeBlock("HANUNOO");
1526
1527 /**
1528 * Constant for the "Buhid" Unicode character block.
1529 * @since 1.5
1530 */
1531 public static final UnicodeBlock BUHID =
1532 new UnicodeBlock("BUHID");
1533
1534 /**
1535 * Constant for the "Tagbanwa" Unicode character block.
1536 * @since 1.5
1537 */
1538 public static final UnicodeBlock TAGBANWA =
1539 new UnicodeBlock("TAGBANWA");
1540
1541 /**
1542 * Constant for the "Limbu" Unicode character block.
1543 * @since 1.5
1544 */
1545 public static final UnicodeBlock LIMBU =
1546 new UnicodeBlock("LIMBU");
1547
1548 /**
1549 * Constant for the "Tai Le" Unicode character block.
1550 * @since 1.5
1551 */
1552 public static final UnicodeBlock TAI_LE =
1553 new UnicodeBlock("TAI_LE",
1554 "TAI LE",
1555 "TAILE");
1556
1557 /**
1558 * Constant for the "Khmer Symbols" Unicode character block.
1559 * @since 1.5
1560 */
1561 public static final UnicodeBlock KHMER_SYMBOLS =
1562 new UnicodeBlock("KHMER_SYMBOLS",
1563 "KHMER SYMBOLS",
1564 "KHMERSYMBOLS");
1565
1566 /**
1567 * Constant for the "Phonetic Extensions" Unicode character block.
1568 * @since 1.5
1569 */
1570 public static final UnicodeBlock PHONETIC_EXTENSIONS =
1571 new UnicodeBlock("PHONETIC_EXTENSIONS",
1572 "PHONETIC EXTENSIONS",
1573 "PHONETICEXTENSIONS");
1574
1575 /**
1576 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1577 * @since 1.5
1578 */
1579 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1580 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1581 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1582 "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1583
1584 /**
1585 * Constant for the "Supplemental Arrows-A" Unicode character block.
1586 * @since 1.5
1587 */
1588 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1589 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1590 "SUPPLEMENTAL ARROWS-A",
1591 "SUPPLEMENTALARROWS-A");
1592
1593 /**
1594 * Constant for the "Supplemental Arrows-B" Unicode character block.
1595 * @since 1.5
1596 */
1597 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1598 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1599 "SUPPLEMENTAL ARROWS-B",
1600 "SUPPLEMENTALARROWS-B");
1601
1602 /**
1603 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1604 * character block.
1605 * @since 1.5
1606 */
1607 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1608 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1609 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1610 "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1611
1612 /**
1613 * Constant for the "Supplemental Mathematical Operators" Unicode
1614 * character block.
1615 * @since 1.5
1616 */
1617 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1618 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1619 "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1620 "SUPPLEMENTALMATHEMATICALOPERATORS");
1621
1622 /**
1623 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1624 * block.
1625 * @since 1.5
1626 */
1627 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1628 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1629 "MISCELLANEOUS SYMBOLS AND ARROWS",
1630 "MISCELLANEOUSSYMBOLSANDARROWS");
1631
1632 /**
1633 * Constant for the "Katakana Phonetic Extensions" Unicode character
1634 * block.
1635 * @since 1.5
1636 */
1637 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1638 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1639 "KATAKANA PHONETIC EXTENSIONS",
1640 "KATAKANAPHONETICEXTENSIONS");
1641
1642 /**
1643 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1644 * @since 1.5
1645 */
1646 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1647 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1648 "YIJING HEXAGRAM SYMBOLS",
1649 "YIJINGHEXAGRAMSYMBOLS");
1650
1651 /**
1652 * Constant for the "Variation Selectors" Unicode character block.
1653 * @since 1.5
1654 */
1655 public static final UnicodeBlock VARIATION_SELECTORS =
1656 new UnicodeBlock("VARIATION_SELECTORS",
1657 "VARIATION SELECTORS",
1658 "VARIATIONSELECTORS");
1659
1660 /**
1661 * Constant for the "Linear B Syllabary" Unicode character block.
1662 * @since 1.5
1663 */
1664 public static final UnicodeBlock LINEAR_B_SYLLABARY =
1665 new UnicodeBlock("LINEAR_B_SYLLABARY",
1666 "LINEAR B SYLLABARY",
1667 "LINEARBSYLLABARY");
1668
1669 /**
1670 * Constant for the "Linear B Ideograms" Unicode character block.
1671 * @since 1.5
1672 */
1673 public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1674 new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1675 "LINEAR B IDEOGRAMS",
1676 "LINEARBIDEOGRAMS");
1677
1678 /**
1679 * Constant for the "Aegean Numbers" Unicode character block.
1680 * @since 1.5
1681 */
1682 public static final UnicodeBlock AEGEAN_NUMBERS =
1683 new UnicodeBlock("AEGEAN_NUMBERS",
1684 "AEGEAN NUMBERS",
1685 "AEGEANNUMBERS");
1686
1687 /**
1688 * Constant for the "Old Italic" Unicode character block.
1689 * @since 1.5
1690 */
1691 public static final UnicodeBlock OLD_ITALIC =
1692 new UnicodeBlock("OLD_ITALIC",
1693 "OLD ITALIC",
1694 "OLDITALIC");
1695
1696 /**
1697 * Constant for the "Gothic" Unicode character block.
1698 * @since 1.5
1699 */
1700 public static final UnicodeBlock GOTHIC =
1701 new UnicodeBlock("GOTHIC");
1702
1703 /**
1704 * Constant for the "Ugaritic" Unicode character block.
1705 * @since 1.5
1706 */
1707 public static final UnicodeBlock UGARITIC =
1708 new UnicodeBlock("UGARITIC");
1709
1710 /**
1711 * Constant for the "Deseret" Unicode character block.
1712 * @since 1.5
1713 */
1714 public static final UnicodeBlock DESERET =
1715 new UnicodeBlock("DESERET");
1716
1717 /**
1718 * Constant for the "Shavian" Unicode character block.
1719 * @since 1.5
1720 */
1721 public static final UnicodeBlock SHAVIAN =
1722 new UnicodeBlock("SHAVIAN");
1723
1724 /**
1725 * Constant for the "Osmanya" Unicode character block.
1726 * @since 1.5
1727 */
1728 public static final UnicodeBlock OSMANYA =
1729 new UnicodeBlock("OSMANYA");
1730
1731 /**
1732 * Constant for the "Cypriot Syllabary" Unicode character block.
1733 * @since 1.5
1734 */
1735 public static final UnicodeBlock CYPRIOT_SYLLABARY =
1736 new UnicodeBlock("CYPRIOT_SYLLABARY",
1737 "CYPRIOT SYLLABARY",
1738 "CYPRIOTSYLLABARY");
1739
1740 /**
1741 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1742 * @since 1.5
1743 */
1744 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1745 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1746 "BYZANTINE MUSICAL SYMBOLS",
1747 "BYZANTINEMUSICALSYMBOLS");
1748
1749 /**
1750 * Constant for the "Musical Symbols" Unicode character block.
1751 * @since 1.5
1752 */
1753 public static final UnicodeBlock MUSICAL_SYMBOLS =
1754 new UnicodeBlock("MUSICAL_SYMBOLS",
1755 "MUSICAL SYMBOLS",
1756 "MUSICALSYMBOLS");
1757
1758 /**
1759 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1760 * @since 1.5
1761 */
1762 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1763 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1764 "TAI XUAN JING SYMBOLS",
1765 "TAIXUANJINGSYMBOLS");
1766
1767 /**
1768 * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1769 * character block.
1770 * @since 1.5
1771 */
1772 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1773 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1774 "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1775 "MATHEMATICALALPHANUMERICSYMBOLS");
1776
1777 /**
1778 * Constant for the "CJK Unified Ideographs Extension B" Unicode
1779 * character block.
1780 * @since 1.5
1781 */
1782 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1783 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1784 "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1785 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1786
1787 /**
1788 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1789 * @since 1.5
1790 */
1791 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1792 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1793 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1794 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1795
1796 /**
1797 * Constant for the "Tags" Unicode character block.
1798 * @since 1.5
1799 */
1800 public static final UnicodeBlock TAGS =
1801 new UnicodeBlock("TAGS");
1802
1803 /**
1804 * Constant for the "Variation Selectors Supplement" Unicode character
1805 * block.
1806 * @since 1.5
1807 */
1808 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1809 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1810 "VARIATION SELECTORS SUPPLEMENT",
1811 "VARIATIONSELECTORSSUPPLEMENT");
1812
1813 /**
1814 * Constant for the "Supplementary Private Use Area-A" Unicode character
1815 * block.
1816 * @since 1.5
1817 */
1818 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1819 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1820 "SUPPLEMENTARY PRIVATE USE AREA-A",
1821 "SUPPLEMENTARYPRIVATEUSEAREA-A");
1822
1823 /**
1824 * Constant for the "Supplementary Private Use Area-B" Unicode character
1825 * block.
1826 * @since 1.5
1827 */
1828 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1829 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1830 "SUPPLEMENTARY PRIVATE USE AREA-B",
1831 "SUPPLEMENTARYPRIVATEUSEAREA-B");
1832
1833 /**
1834 * Constant for the "High Surrogates" Unicode character block.
1835 * This block represents codepoint values in the high surrogate
1836 * range: U+D800 through U+DB7F
1837 *
1838 * @since 1.5
1839 */
1840 public static final UnicodeBlock HIGH_SURROGATES =
1841 new UnicodeBlock("HIGH_SURROGATES",
1842 "HIGH SURROGATES",
1843 "HIGHSURROGATES");
1844
1845 /**
1846 * Constant for the "High Private Use Surrogates" Unicode character
1847 * block.
1848 * This block represents codepoint values in the private use high
1849 * surrogate range: U+DB80 through U+DBFF
1850 *
1851 * @since 1.5
1852 */
1853 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1854 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1855 "HIGH PRIVATE USE SURROGATES",
1856 "HIGHPRIVATEUSESURROGATES");
1857
1858 /**
1859 * Constant for the "Low Surrogates" Unicode character block.
1860 * This block represents codepoint values in the low surrogate
1861 * range: U+DC00 through U+DFFF
1862 *
1863 * @since 1.5
1864 */
1865 public static final UnicodeBlock LOW_SURROGATES =
1866 new UnicodeBlock("LOW_SURROGATES",
1867 "LOW SURROGATES",
1868 "LOWSURROGATES");
1869
1870 /**
1871 * Constant for the "Arabic Supplement" Unicode character block.
1872 * @since 1.7
1873 */
1874 public static final UnicodeBlock ARABIC_SUPPLEMENT =
1875 new UnicodeBlock("ARABIC_SUPPLEMENT",
1876 "ARABIC SUPPLEMENT",
1877 "ARABICSUPPLEMENT");
1878
1879 /**
1880 * Constant for the "NKo" Unicode character block.
1881 * @since 1.7
1882 */
1883 public static final UnicodeBlock NKO =
1884 new UnicodeBlock("NKO");
1885
1886 /**
1887 * Constant for the "Samaritan" Unicode character block.
1888 * @since 1.7
1889 */
1890 public static final UnicodeBlock SAMARITAN =
1891 new UnicodeBlock("SAMARITAN");
1892
1893 /**
1894 * Constant for the "Mandaic" Unicode character block.
1895 * @since 1.7
1896 */
1897 public static final UnicodeBlock MANDAIC =
1898 new UnicodeBlock("MANDAIC");
1899
1900 /**
1901 * Constant for the "Ethiopic Supplement" Unicode character block.
1902 * @since 1.7
1903 */
1904 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1905 new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1906 "ETHIOPIC SUPPLEMENT",
1907 "ETHIOPICSUPPLEMENT");
1908
1909 /**
1910 * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1911 * Unicode character block.
1912 * @since 1.7
1913 */
1914 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1915 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1916 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1917 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1918
1919 /**
1920 * Constant for the "New Tai Lue" Unicode character block.
1921 * @since 1.7
1922 */
1923 public static final UnicodeBlock NEW_TAI_LUE =
1924 new UnicodeBlock("NEW_TAI_LUE",
1925 "NEW TAI LUE",
1926 "NEWTAILUE");
1927
1928 /**
1929 * Constant for the "Buginese" Unicode character block.
1930 * @since 1.7
1931 */
1932 public static final UnicodeBlock BUGINESE =
1933 new UnicodeBlock("BUGINESE");
1934
1935 /**
1936 * Constant for the "Tai Tham" Unicode character block.
1937 * @since 1.7
1938 */
1939 public static final UnicodeBlock TAI_THAM =
1940 new UnicodeBlock("TAI_THAM",
1941 "TAI THAM",
1942 "TAITHAM");
1943
1944 /**
1945 * Constant for the "Balinese" Unicode character block.
1946 * @since 1.7
1947 */
1948 public static final UnicodeBlock BALINESE =
1949 new UnicodeBlock("BALINESE");
1950
1951 /**
1952 * Constant for the "Sundanese" Unicode character block.
1953 * @since 1.7
1954 */
1955 public static final UnicodeBlock SUNDANESE =
1956 new UnicodeBlock("SUNDANESE");
1957
1958 /**
1959 * Constant for the "Batak" Unicode character block.
1960 * @since 1.7
1961 */
1962 public static final UnicodeBlock BATAK =
1963 new UnicodeBlock("BATAK");
1964
1965 /**
1966 * Constant for the "Lepcha" Unicode character block.
1967 * @since 1.7
1968 */
1969 public static final UnicodeBlock LEPCHA =
1970 new UnicodeBlock("LEPCHA");
1971
1972 /**
1973 * Constant for the "Ol Chiki" Unicode character block.
1974 * @since 1.7
1975 */
1976 public static final UnicodeBlock OL_CHIKI =
1977 new UnicodeBlock("OL_CHIKI",
1978 "OL CHIKI",
1979 "OLCHIKI");
1980
1981 /**
1982 * Constant for the "Vedic Extensions" Unicode character block.
1983 * @since 1.7
1984 */
1985 public static final UnicodeBlock VEDIC_EXTENSIONS =
1986 new UnicodeBlock("VEDIC_EXTENSIONS",
1987 "VEDIC EXTENSIONS",
1988 "VEDICEXTENSIONS");
1989
1990 /**
1991 * Constant for the "Phonetic Extensions Supplement" Unicode character
1992 * block.
1993 * @since 1.7
1994 */
1995 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1996 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1997 "PHONETIC EXTENSIONS SUPPLEMENT",
1998 "PHONETICEXTENSIONSSUPPLEMENT");
1999
2000 /**
2001 * Constant for the "Combining Diacritical Marks Supplement" Unicode
2002 * character block.
2003 * @since 1.7
2004 */
2005 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2006 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2007 "COMBINING DIACRITICAL MARKS SUPPLEMENT",
2008 "COMBININGDIACRITICALMARKSSUPPLEMENT");
2009
2010 /**
2011 * Constant for the "Glagolitic" Unicode character block.
2012 * @since 1.7
2013 */
2014 public static final UnicodeBlock GLAGOLITIC =
2015 new UnicodeBlock("GLAGOLITIC");
2016
2017 /**
2018 * Constant for the "Latin Extended-C" Unicode character block.
2019 * @since 1.7
2020 */
2021 public static final UnicodeBlock LATIN_EXTENDED_C =
2022 new UnicodeBlock("LATIN_EXTENDED_C",
2023 "LATIN EXTENDED-C",
2024 "LATINEXTENDED-C");
2025
2026 /**
2027 * Constant for the "Coptic" Unicode character block.
2028 * @since 1.7
2029 */
2030 public static final UnicodeBlock COPTIC =
2031 new UnicodeBlock("COPTIC");
2032
2033 /**
2034 * Constant for the "Georgian Supplement" Unicode character block.
2035 * @since 1.7
2036 */
2037 public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2038 new UnicodeBlock("GEORGIAN_SUPPLEMENT",
2039 "GEORGIAN SUPPLEMENT",
2040 "GEORGIANSUPPLEMENT");
2041
2042 /**
2043 * Constant for the "Tifinagh" Unicode character block.
2044 * @since 1.7
2045 */
2046 public static final UnicodeBlock TIFINAGH =
2047 new UnicodeBlock("TIFINAGH");
2048
2049 /**
2050 * Constant for the "Ethiopic Extended" Unicode character block.
2051 * @since 1.7
2052 */
2053 public static final UnicodeBlock ETHIOPIC_EXTENDED =
2054 new UnicodeBlock("ETHIOPIC_EXTENDED",
2055 "ETHIOPIC EXTENDED",
2056 "ETHIOPICEXTENDED");
2057
2058 /**
2059 * Constant for the "Cyrillic Extended-A" Unicode character block.
2060 * @since 1.7
2061 */
2062 public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2063 new UnicodeBlock("CYRILLIC_EXTENDED_A",
2064 "CYRILLIC EXTENDED-A",
2065 "CYRILLICEXTENDED-A");
2066
2067 /**
2068 * Constant for the "Supplemental Punctuation" Unicode character block.
2069 * @since 1.7
2070 */
2071 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2072 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2073 "SUPPLEMENTAL PUNCTUATION",
2074 "SUPPLEMENTALPUNCTUATION");
2075
2076 /**
2077 * Constant for the "CJK Strokes" Unicode character block.
2078 * @since 1.7
2079 */
2080 public static final UnicodeBlock CJK_STROKES =
2081 new UnicodeBlock("CJK_STROKES",
2082 "CJK STROKES",
2083 "CJKSTROKES");
2084
2085 /**
2086 * Constant for the "Lisu" Unicode character block.
2087 * @since 1.7
2088 */
2089 public static final UnicodeBlock LISU =
2090 new UnicodeBlock("LISU");
2091
2092 /**
2093 * Constant for the "Vai" Unicode character block.
2094 * @since 1.7
2095 */
2096 public static final UnicodeBlock VAI =
2097 new UnicodeBlock("VAI");
2098
2099 /**
2100 * Constant for the "Cyrillic Extended-B" Unicode character block.
2101 * @since 1.7
2102 */
2103 public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2104 new UnicodeBlock("CYRILLIC_EXTENDED_B",
2105 "CYRILLIC EXTENDED-B",
2106 "CYRILLICEXTENDED-B");
2107
2108 /**
2109 * Constant for the "Bamum" Unicode character block.
2110 * @since 1.7
2111 */
2112 public static final UnicodeBlock BAMUM =
2113 new UnicodeBlock("BAMUM");
2114
2115 /**
2116 * Constant for the "Modifier Tone Letters" Unicode character block.
2117 * @since 1.7
2118 */
2119 public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2120 new UnicodeBlock("MODIFIER_TONE_LETTERS",
2121 "MODIFIER TONE LETTERS",
2122 "MODIFIERTONELETTERS");
2123
2124 /**
2125 * Constant for the "Latin Extended-D" Unicode character block.
2126 * @since 1.7
2127 */
2128 public static final UnicodeBlock LATIN_EXTENDED_D =
2129 new UnicodeBlock("LATIN_EXTENDED_D",
2130 "LATIN EXTENDED-D",
2131 "LATINEXTENDED-D");
2132
2133 /**
2134 * Constant for the "Syloti Nagri" Unicode character block.
2135 * @since 1.7
2136 */
2137 public static final UnicodeBlock SYLOTI_NAGRI =
2138 new UnicodeBlock("SYLOTI_NAGRI",
2139 "SYLOTI NAGRI",
2140 "SYLOTINAGRI");
2141
2142 /**
2143 * Constant for the "Common Indic Number Forms" Unicode character block.
2144 * @since 1.7
2145 */
2146 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2147 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2148 "COMMON INDIC NUMBER FORMS",
2149 "COMMONINDICNUMBERFORMS");
2150
2151 /**
2152 * Constant for the "Phags-pa" Unicode character block.
2153 * @since 1.7
2154 */
2155 public static final UnicodeBlock PHAGS_PA =
2156 new UnicodeBlock("PHAGS_PA",
2157 "PHAGS-PA");
2158
2159 /**
2160 * Constant for the "Saurashtra" Unicode character block.
2161 * @since 1.7
2162 */
2163 public static final UnicodeBlock SAURASHTRA =
2164 new UnicodeBlock("SAURASHTRA");
2165
2166 /**
2167 * Constant for the "Devanagari Extended" Unicode character block.
2168 * @since 1.7
2169 */
2170 public static final UnicodeBlock DEVANAGARI_EXTENDED =
2171 new UnicodeBlock("DEVANAGARI_EXTENDED",
2172 "DEVANAGARI EXTENDED",
2173 "DEVANAGARIEXTENDED");
2174
2175 /**
2176 * Constant for the "Kayah Li" Unicode character block.
2177 * @since 1.7
2178 */
2179 public static final UnicodeBlock KAYAH_LI =
2180 new UnicodeBlock("KAYAH_LI",
2181 "KAYAH LI",
2182 "KAYAHLI");
2183
2184 /**
2185 * Constant for the "Rejang" Unicode character block.
2186 * @since 1.7
2187 */
2188 public static final UnicodeBlock REJANG =
2189 new UnicodeBlock("REJANG");
2190
2191 /**
2192 * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2193 * @since 1.7
2194 */
2195 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2196 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2197 "HANGUL JAMO EXTENDED-A",
2198 "HANGULJAMOEXTENDED-A");
2199
2200 /**
2201 * Constant for the "Javanese" Unicode character block.
2202 * @since 1.7
2203 */
2204 public static final UnicodeBlock JAVANESE =
2205 new UnicodeBlock("JAVANESE");
2206
2207 /**
2208 * Constant for the "Cham" Unicode character block.
2209 * @since 1.7
2210 */
2211 public static final UnicodeBlock CHAM =
2212 new UnicodeBlock("CHAM");
2213
2214 /**
2215 * Constant for the "Myanmar Extended-A" Unicode character block.
2216 * @since 1.7
2217 */
2218 public static final UnicodeBlock MYANMAR_EXTENDED_A =
2219 new UnicodeBlock("MYANMAR_EXTENDED_A",
2220 "MYANMAR EXTENDED-A",
2221 "MYANMAREXTENDED-A");
2222
2223 /**
2224 * Constant for the "Tai Viet" Unicode character block.
2225 * @since 1.7
2226 */
2227 public static final UnicodeBlock TAI_VIET =
2228 new UnicodeBlock("TAI_VIET",
2229 "TAI VIET",
2230 "TAIVIET");
2231
2232 /**
2233 * Constant for the "Ethiopic Extended-A" Unicode character block.
2234 * @since 1.7
2235 */
2236 public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2237 new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2238 "ETHIOPIC EXTENDED-A",
2239 "ETHIOPICEXTENDED-A");
2240
2241 /**
2242 * Constant for the "Meetei Mayek" Unicode character block.
2243 * @since 1.7
2244 */
2245 public static final UnicodeBlock MEETEI_MAYEK =
2246 new UnicodeBlock("MEETEI_MAYEK",
2247 "MEETEI MAYEK",
2248 "MEETEIMAYEK");
2249
2250 /**
2251 * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2252 * @since 1.7
2253 */
2254 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2255 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2256 "HANGUL JAMO EXTENDED-B",
2257 "HANGULJAMOEXTENDED-B");
2258
2259 /**
2260 * Constant for the "Vertical Forms" Unicode character block.
2261 * @since 1.7
2262 */
2263 public static final UnicodeBlock VERTICAL_FORMS =
2264 new UnicodeBlock("VERTICAL_FORMS",
2265 "VERTICAL FORMS",
2266 "VERTICALFORMS");
2267
2268 /**
2269 * Constant for the "Ancient Greek Numbers" Unicode character block.
2270 * @since 1.7
2271 */
2272 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2273 new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2274 "ANCIENT GREEK NUMBERS",
2275 "ANCIENTGREEKNUMBERS");
2276
2277 /**
2278 * Constant for the "Ancient Symbols" Unicode character block.
2279 * @since 1.7
2280 */
2281 public static final UnicodeBlock ANCIENT_SYMBOLS =
2282 new UnicodeBlock("ANCIENT_SYMBOLS",
2283 "ANCIENT SYMBOLS",
2284 "ANCIENTSYMBOLS");
2285
2286 /**
2287 * Constant for the "Phaistos Disc" Unicode character block.
2288 * @since 1.7
2289 */
2290 public static final UnicodeBlock PHAISTOS_DISC =
2291 new UnicodeBlock("PHAISTOS_DISC",
2292 "PHAISTOS DISC",
2293 "PHAISTOSDISC");
2294
2295 /**
2296 * Constant for the "Lycian" Unicode character block.
2297 * @since 1.7
2298 */
2299 public static final UnicodeBlock LYCIAN =
2300 new UnicodeBlock("LYCIAN");
2301
2302 /**
2303 * Constant for the "Carian" Unicode character block.
2304 * @since 1.7
2305 */
2306 public static final UnicodeBlock CARIAN =
2307 new UnicodeBlock("CARIAN");
2308
2309 /**
2310 * Constant for the "Old Persian" Unicode character block.
2311 * @since 1.7
2312 */
2313 public static final UnicodeBlock OLD_PERSIAN =
2314 new UnicodeBlock("OLD_PERSIAN",
2315 "OLD PERSIAN",
2316 "OLDPERSIAN");
2317
2318 /**
2319 * Constant for the "Imperial Aramaic" Unicode character block.
2320 * @since 1.7
2321 */
2322 public static final UnicodeBlock IMPERIAL_ARAMAIC =
2323 new UnicodeBlock("IMPERIAL_ARAMAIC",
2324 "IMPERIAL ARAMAIC",
2325 "IMPERIALARAMAIC");
2326
2327 /**
2328 * Constant for the "Phoenician" Unicode character block.
2329 * @since 1.7
2330 */
2331 public static final UnicodeBlock PHOENICIAN =
2332 new UnicodeBlock("PHOENICIAN");
2333
2334 /**
2335 * Constant for the "Lydian" Unicode character block.
2336 * @since 1.7
2337 */
2338 public static final UnicodeBlock LYDIAN =
2339 new UnicodeBlock("LYDIAN");
2340
2341 /**
2342 * Constant for the "Kharoshthi" Unicode character block.
2343 * @since 1.7
2344 */
2345 public static final UnicodeBlock KHAROSHTHI =
2346 new UnicodeBlock("KHAROSHTHI");
2347
2348 /**
2349 * Constant for the "Old South Arabian" Unicode character block.
2350 * @since 1.7
2351 */
2352 public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2353 new UnicodeBlock("OLD_SOUTH_ARABIAN",
2354 "OLD SOUTH ARABIAN",
2355 "OLDSOUTHARABIAN");
2356
2357 /**
2358 * Constant for the "Avestan" Unicode character block.
2359 * @since 1.7
2360 */
2361 public static final UnicodeBlock AVESTAN =
2362 new UnicodeBlock("AVESTAN");
2363
2364 /**
2365 * Constant for the "Inscriptional Parthian" Unicode character block.
2366 * @since 1.7
2367 */
2368 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2369 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2370 "INSCRIPTIONAL PARTHIAN",
2371 "INSCRIPTIONALPARTHIAN");
2372
2373 /**
2374 * Constant for the "Inscriptional Pahlavi" Unicode character block.
2375 * @since 1.7
2376 */
2377 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2378 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2379 "INSCRIPTIONAL PAHLAVI",
2380 "INSCRIPTIONALPAHLAVI");
2381
2382 /**
2383 * Constant for the "Old Turkic" Unicode character block.
2384 * @since 1.7
2385 */
2386 public static final UnicodeBlock OLD_TURKIC =
2387 new UnicodeBlock("OLD_TURKIC",
2388 "OLD TURKIC",
2389 "OLDTURKIC");
2390
2391 /**
2392 * Constant for the "Rumi Numeral Symbols" Unicode character block.
2393 * @since 1.7
2394 */
2395 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2396 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2397 "RUMI NUMERAL SYMBOLS",
2398 "RUMINUMERALSYMBOLS");
2399
2400 /**
2401 * Constant for the "Brahmi" Unicode character block.
2402 * @since 1.7
2403 */
2404 public static final UnicodeBlock BRAHMI =
2405 new UnicodeBlock("BRAHMI");
2406
2407 /**
2408 * Constant for the "Kaithi" Unicode character block.
2409 * @since 1.7
2410 */
2411 public static final UnicodeBlock KAITHI =
2412 new UnicodeBlock("KAITHI");
2413
2414 /**
2415 * Constant for the "Cuneiform" Unicode character block.
2416 * @since 1.7
2417 */
2418 public static final UnicodeBlock CUNEIFORM =
2419 new UnicodeBlock("CUNEIFORM");
2420
2421 /**
2422 * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2423 * character block.
2424 * @since 1.7
2425 */
2426 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2427 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2428 "CUNEIFORM NUMBERS AND PUNCTUATION",
2429 "CUNEIFORMNUMBERSANDPUNCTUATION");
2430
2431 /**
2432 * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2433 * @since 1.7
2434 */
2435 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2436 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2437 "EGYPTIAN HIEROGLYPHS",
2438 "EGYPTIANHIEROGLYPHS");
2439
2440 /**
2441 * Constant for the "Bamum Supplement" Unicode character block.
2442 * @since 1.7
2443 */
2444 public static final UnicodeBlock BAMUM_SUPPLEMENT =
2445 new UnicodeBlock("BAMUM_SUPPLEMENT",
2446 "BAMUM SUPPLEMENT",
2447 "BAMUMSUPPLEMENT");
2448
2449 /**
2450 * Constant for the "Kana Supplement" Unicode character block.
2451 * @since 1.7
2452 */
2453 public static final UnicodeBlock KANA_SUPPLEMENT =
2454 new UnicodeBlock("KANA_SUPPLEMENT",
2455 "KANA SUPPLEMENT",
2456 "KANASUPPLEMENT");
2457
2458 /**
2459 * Constant for the "Ancient Greek Musical Notation" Unicode character
2460 * block.
2461 * @since 1.7
2462 */
2463 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2464 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2465 "ANCIENT GREEK MUSICAL NOTATION",
2466 "ANCIENTGREEKMUSICALNOTATION");
2467
2468 /**
2469 * Constant for the "Counting Rod Numerals" Unicode character block.
2470 * @since 1.7
2471 */
2472 public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2473 new UnicodeBlock("COUNTING_ROD_NUMERALS",
2474 "COUNTING ROD NUMERALS",
2475 "COUNTINGRODNUMERALS");
2476
2477 /**
2478 * Constant for the "Mahjong Tiles" Unicode character block.
2479 * @since 1.7
2480 */
2481 public static final UnicodeBlock MAHJONG_TILES =
2482 new UnicodeBlock("MAHJONG_TILES",
2483 "MAHJONG TILES",
2484 "MAHJONGTILES");
2485
2486 /**
2487 * Constant for the "Domino Tiles" Unicode character block.
2488 * @since 1.7
2489 */
2490 public static final UnicodeBlock DOMINO_TILES =
2491 new UnicodeBlock("DOMINO_TILES",
2492 "DOMINO TILES",
2493 "DOMINOTILES");
2494
2495 /**
2496 * Constant for the "Playing Cards" Unicode character block.
2497 * @since 1.7
2498 */
2499 public static final UnicodeBlock PLAYING_CARDS =
2500 new UnicodeBlock("PLAYING_CARDS",
2501 "PLAYING CARDS",
2502 "PLAYINGCARDS");
2503
2504 /**
2505 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2506 * block.
2507 * @since 1.7
2508 */
2509 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2510 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2511 "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2512 "ENCLOSEDALPHANUMERICSUPPLEMENT");
2513
2514 /**
2515 * Constant for the "Enclosed Ideographic Supplement" Unicode character
2516 * block.
2517 * @since 1.7
2518 */
2519 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2520 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2521 "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2522 "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2523
2524 /**
2525 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2526 * character block.
2527 * @since 1.7
2528 */
2529 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2530 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2531 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2532 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2533
2534 /**
2535 * Constant for the "Emoticons" Unicode character block.
2536 * @since 1.7
2537 */
2538 public static final UnicodeBlock EMOTICONS =
2539 new UnicodeBlock("EMOTICONS");
2540
2541 /**
2542 * Constant for the "Transport And Map Symbols" Unicode character block.
2543 * @since 1.7
2544 */
2545 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2546 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2547 "TRANSPORT AND MAP SYMBOLS",
2548 "TRANSPORTANDMAPSYMBOLS");
2549
2550 /**
2551 * Constant for the "Alchemical Symbols" Unicode character block.
2552 * @since 1.7
2553 */
2554 public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2555 new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2556 "ALCHEMICAL SYMBOLS",
2557 "ALCHEMICALSYMBOLS");
2558
2559 /**
2560 * Constant for the "CJK Unified Ideographs Extension C" Unicode
2561 * character block.
2562 * @since 1.7
2563 */
2564 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2565 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2566 "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2567 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2568
2569 /**
2570 * Constant for the "CJK Unified Ideographs Extension D" Unicode
2571 * character block.
2572 * @since 1.7
2573 */
2574 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2575 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2576 "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2577 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2578
2579 /**
2580 * Constant for the "Arabic Extended-A" Unicode character block.
2581 * @since 1.8
2582 */
2583 public static final UnicodeBlock ARABIC_EXTENDED_A =
2584 new UnicodeBlock("ARABIC_EXTENDED_A",
2585 "ARABIC EXTENDED-A",
2586 "ARABICEXTENDED-A");
2587
2588 /**
2589 * Constant for the "Sundanese Supplement" Unicode character block.
2590 * @since 1.8
2591 */
2592 public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2593 new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2594 "SUNDANESE SUPPLEMENT",
2595 "SUNDANESESUPPLEMENT");
2596
2597 /**
2598 * Constant for the "Meetei Mayek Extensions" Unicode character block.
2599 * @since 1.8
2600 */
2601 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2602 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2603 "MEETEI MAYEK EXTENSIONS",
2604 "MEETEIMAYEKEXTENSIONS");
2605
2606 /**
2607 * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2608 * @since 1.8
2609 */
2610 public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2611 new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2612 "MEROITIC HIEROGLYPHS",
2613 "MEROITICHIEROGLYPHS");
2614
2615 /**
2616 * Constant for the "Meroitic Cursive" Unicode character block.
2617 * @since 1.8
2618 */
2619 public static final UnicodeBlock MEROITIC_CURSIVE =
2620 new UnicodeBlock("MEROITIC_CURSIVE",
2621 "MEROITIC CURSIVE",
2622 "MEROITICCURSIVE");
2623
2624 /**
2625 * Constant for the "Sora Sompeng" Unicode character block.
2626 * @since 1.8
2627 */
2628 public static final UnicodeBlock SORA_SOMPENG =
2629 new UnicodeBlock("SORA_SOMPENG",
2630 "SORA SOMPENG",
2631 "SORASOMPENG");
2632
2633 /**
2634 * Constant for the "Chakma" Unicode character block.
2635 * @since 1.8
2636 */
2637 public static final UnicodeBlock CHAKMA =
2638 new UnicodeBlock("CHAKMA");
2639
2640 /**
2641 * Constant for the "Sharada" Unicode character block.
2642 * @since 1.8
2643 */
2644 public static final UnicodeBlock SHARADA =
2645 new UnicodeBlock("SHARADA");
2646
2647 /**
2648 * Constant for the "Takri" Unicode character block.
2649 * @since 1.8
2650 */
2651 public static final UnicodeBlock TAKRI =
2652 new UnicodeBlock("TAKRI");
2653
2654 /**
2655 * Constant for the "Miao" Unicode character block.
2656 * @since 1.8
2657 */
2658 public static final UnicodeBlock MIAO =
2659 new UnicodeBlock("MIAO");
2660
2661 /**
2662 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2663 * character block.
2664 * @since 1.8
2665 */
2666 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2667 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2668 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2669 "ARABICMATHEMATICALALPHABETICSYMBOLS");
2670
2671 /**
2672 * Constant for the "Combining Diacritical Marks Extended" Unicode
2673 * character block.
2674 * @since 9
2675 */
2676 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2677 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2678 "COMBINING DIACRITICAL MARKS EXTENDED",
2679 "COMBININGDIACRITICALMARKSEXTENDED");
2680
2681 /**
2682 * Constant for the "Myanmar Extended-B" Unicode character block.
2683 * @since 9
2684 */
2685 public static final UnicodeBlock MYANMAR_EXTENDED_B =
2686 new UnicodeBlock("MYANMAR_EXTENDED_B",
2687 "MYANMAR EXTENDED-B",
2688 "MYANMAREXTENDED-B");
2689
2690 /**
2691 * Constant for the "Latin Extended-E" Unicode character block.
2692 * @since 9
2693 */
2694 public static final UnicodeBlock LATIN_EXTENDED_E =
2695 new UnicodeBlock("LATIN_EXTENDED_E",
2696 "LATIN EXTENDED-E",
2697 "LATINEXTENDED-E");
2698
2699 /**
2700 * Constant for the "Coptic Epact Numbers" Unicode character block.
2701 * @since 9
2702 */
2703 public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2704 new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2705 "COPTIC EPACT NUMBERS",
2706 "COPTICEPACTNUMBERS");
2707
2708 /**
2709 * Constant for the "Old Permic" Unicode character block.
2710 * @since 9
2711 */
2712 public static final UnicodeBlock OLD_PERMIC =
2713 new UnicodeBlock("OLD_PERMIC",
2714 "OLD PERMIC",
2715 "OLDPERMIC");
2716
2717 /**
2718 * Constant for the "Elbasan" Unicode character block.
2719 * @since 9
2720 */
2721 public static final UnicodeBlock ELBASAN =
2722 new UnicodeBlock("ELBASAN");
2723
2724 /**
2725 * Constant for the "Caucasian Albanian" Unicode character block.
2726 * @since 9
2727 */
2728 public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2729 new UnicodeBlock("CAUCASIAN_ALBANIAN",
2730 "CAUCASIAN ALBANIAN",
2731 "CAUCASIANALBANIAN");
2732
2733 /**
2734 * Constant for the "Linear A" Unicode character block.
2735 * @since 9
2736 */
2737 public static final UnicodeBlock LINEAR_A =
2738 new UnicodeBlock("LINEAR_A",
2739 "LINEAR A",
2740 "LINEARA");
2741
2742 /**
2743 * Constant for the "Palmyrene" Unicode character block.
2744 * @since 9
2745 */
2746 public static final UnicodeBlock PALMYRENE =
2747 new UnicodeBlock("PALMYRENE");
2748
2749 /**
2750 * Constant for the "Nabataean" Unicode character block.
2751 * @since 9
2752 */
2753 public static final UnicodeBlock NABATAEAN =
2754 new UnicodeBlock("NABATAEAN");
2755
2756 /**
2757 * Constant for the "Old North Arabian" Unicode character block.
2758 * @since 9
2759 */
2760 public static final UnicodeBlock OLD_NORTH_ARABIAN =
2761 new UnicodeBlock("OLD_NORTH_ARABIAN",
2762 "OLD NORTH ARABIAN",
2763 "OLDNORTHARABIAN");
2764
2765 /**
2766 * Constant for the "Manichaean" Unicode character block.
2767 * @since 9
2768 */
2769 public static final UnicodeBlock MANICHAEAN =
2770 new UnicodeBlock("MANICHAEAN");
2771
2772 /**
2773 * Constant for the "Psalter Pahlavi" Unicode character block.
2774 * @since 9
2775 */
2776 public static final UnicodeBlock PSALTER_PAHLAVI =
2777 new UnicodeBlock("PSALTER_PAHLAVI",
2778 "PSALTER PAHLAVI",
2779 "PSALTERPAHLAVI");
2780
2781 /**
2782 * Constant for the "Mahajani" Unicode character block.
2783 * @since 9
2784 */
2785 public static final UnicodeBlock MAHAJANI =
2786 new UnicodeBlock("MAHAJANI");
2787
2788 /**
2789 * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2790 * @since 9
2791 */
2792 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2793 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2794 "SINHALA ARCHAIC NUMBERS",
2795 "SINHALAARCHAICNUMBERS");
2796
2797 /**
2798 * Constant for the "Khojki" Unicode character block.
2799 * @since 9
2800 */
2801 public static final UnicodeBlock KHOJKI =
2802 new UnicodeBlock("KHOJKI");
2803
2804 /**
2805 * Constant for the "Khudawadi" Unicode character block.
2806 * @since 9
2807 */
2808 public static final UnicodeBlock KHUDAWADI =
2809 new UnicodeBlock("KHUDAWADI");
2810
2811 /**
2812 * Constant for the "Grantha" Unicode character block.
2813 * @since 9
2814 */
2815 public static final UnicodeBlock GRANTHA =
2816 new UnicodeBlock("GRANTHA");
2817
2818 /**
2819 * Constant for the "Tirhuta" Unicode character block.
2820 * @since 9
2821 */
2822 public static final UnicodeBlock TIRHUTA =
2823 new UnicodeBlock("TIRHUTA");
2824
2825 /**
2826 * Constant for the "Siddham" Unicode character block.
2827 * @since 9
2828 */
2829 public static final UnicodeBlock SIDDHAM =
2830 new UnicodeBlock("SIDDHAM");
2831
2832 /**
2833 * Constant for the "Modi" Unicode character block.
2834 * @since 9
2835 */
2836 public static final UnicodeBlock MODI =
2837 new UnicodeBlock("MODI");
2838
2839 /**
2840 * Constant for the "Warang Citi" Unicode character block.
2841 * @since 9
2842 */
2843 public static final UnicodeBlock WARANG_CITI =
2844 new UnicodeBlock("WARANG_CITI",
2845 "WARANG CITI",
2846 "WARANGCITI");
2847
2848 /**
2849 * Constant for the "Pau Cin Hau" Unicode character block.
2850 * @since 9
2851 */
2852 public static final UnicodeBlock PAU_CIN_HAU =
2853 new UnicodeBlock("PAU_CIN_HAU",
2854 "PAU CIN HAU",
2855 "PAUCINHAU");
2856
2857 /**
2858 * Constant for the "Mro" Unicode character block.
2859 * @since 9
2860 */
2861 public static final UnicodeBlock MRO =
2862 new UnicodeBlock("MRO");
2863
2864 /**
2865 * Constant for the "Bassa Vah" Unicode character block.
2866 * @since 9
2867 */
2868 public static final UnicodeBlock BASSA_VAH =
2869 new UnicodeBlock("BASSA_VAH",
2870 "BASSA VAH",
2871 "BASSAVAH");
2872
2873 /**
2874 * Constant for the "Pahawh Hmong" Unicode character block.
2875 * @since 9
2876 */
2877 public static final UnicodeBlock PAHAWH_HMONG =
2878 new UnicodeBlock("PAHAWH_HMONG",
2879 "PAHAWH HMONG",
2880 "PAHAWHHMONG");
2881
2882 /**
2883 * Constant for the "Duployan" Unicode character block.
2884 * @since 9
2885 */
2886 public static final UnicodeBlock DUPLOYAN =
2887 new UnicodeBlock("DUPLOYAN");
2888
2889 /**
2890 * Constant for the "Shorthand Format Controls" Unicode character block.
2891 * @since 9
2892 */
2893 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2894 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2895 "SHORTHAND FORMAT CONTROLS",
2896 "SHORTHANDFORMATCONTROLS");
2897
2898 /**
2899 * Constant for the "Mende Kikakui" Unicode character block.
2900 * @since 9
2901 */
2902 public static final UnicodeBlock MENDE_KIKAKUI =
2903 new UnicodeBlock("MENDE_KIKAKUI",
2904 "MENDE KIKAKUI",
2905 "MENDEKIKAKUI");
2906
2907 /**
2908 * Constant for the "Ornamental Dingbats" Unicode character block.
2909 * @since 9
2910 */
2911 public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2912 new UnicodeBlock("ORNAMENTAL_DINGBATS",
2913 "ORNAMENTAL DINGBATS",
2914 "ORNAMENTALDINGBATS");
2915
2916 /**
2917 * Constant for the "Geometric Shapes Extended" Unicode character block.
2918 * @since 9
2919 */
2920 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2921 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2922 "GEOMETRIC SHAPES EXTENDED",
2923 "GEOMETRICSHAPESEXTENDED");
2924
2925 /**
2926 * Constant for the "Supplemental Arrows-C" Unicode character block.
2927 * @since 9
2928 */
2929 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2930 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2931 "SUPPLEMENTAL ARROWS-C",
2932 "SUPPLEMENTALARROWS-C");
2933
2934 /**
2935 * Constant for the "Cherokee Supplement" Unicode character block.
2936 * @since 9
2937 */
2938 public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2939 new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2940 "CHEROKEE SUPPLEMENT",
2941 "CHEROKEESUPPLEMENT");
2942
2943 /**
2944 * Constant for the "Hatran" Unicode character block.
2945 * @since 9
2946 */
2947 public static final UnicodeBlock HATRAN =
2948 new UnicodeBlock("HATRAN");
2949
2950 /**
2951 * Constant for the "Old Hungarian" Unicode character block.
2952 * @since 9
2953 */
2954 public static final UnicodeBlock OLD_HUNGARIAN =
2955 new UnicodeBlock("OLD_HUNGARIAN",
2956 "OLD HUNGARIAN",
2957 "OLDHUNGARIAN");
2958
2959 /**
2960 * Constant for the "Multani" Unicode character block.
2961 * @since 9
2962 */
2963 public static final UnicodeBlock MULTANI =
2964 new UnicodeBlock("MULTANI");
2965
2966 /**
2967 * Constant for the "Ahom" Unicode character block.
2968 * @since 9
2969 */
2970 public static final UnicodeBlock AHOM =
2971 new UnicodeBlock("AHOM");
2972
2973 /**
2974 * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2975 * @since 9
2976 */
2977 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2978 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2979 "EARLY DYNASTIC CUNEIFORM",
2980 "EARLYDYNASTICCUNEIFORM");
2981
2982 /**
2983 * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2984 * @since 9
2985 */
2986 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2987 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2988 "ANATOLIAN HIEROGLYPHS",
2989 "ANATOLIANHIEROGLYPHS");
2990
2991 /**
2992 * Constant for the "Sutton SignWriting" Unicode character block.
2993 * @since 9
2994 */
2995 public static final UnicodeBlock SUTTON_SIGNWRITING =
2996 new UnicodeBlock("SUTTON_SIGNWRITING",
2997 "SUTTON SIGNWRITING",
2998 "SUTTONSIGNWRITING");
2999
3000 /**
3001 * Constant for the "Supplemental Symbols and Pictographs" Unicode
3002 * character block.
3003 * @since 9
3004 */
3005 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
3006 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
3007 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
3008 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
3009
3010 /**
3011 * Constant for the "CJK Unified Ideographs Extension E" Unicode
3012 * character block.
3013 * @since 9
3014 */
3015 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
3016 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
3017 "CJK UNIFIED IDEOGRAPHS EXTENSION E",
3018 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
3019
3020 /**
3021 * Constant for the "Syriac Supplement" Unicode
3022 * character block.
3023 * @since 11
3024 */
3025 public static final UnicodeBlock SYRIAC_SUPPLEMENT =
3026 new UnicodeBlock("SYRIAC_SUPPLEMENT",
3027 "SYRIAC SUPPLEMENT",
3028 "SYRIACSUPPLEMENT");
3029
3030 /**
3031 * Constant for the "Cyrillic Extended-C" Unicode
3032 * character block.
3033 * @since 11
3034 */
3035 public static final UnicodeBlock CYRILLIC_EXTENDED_C =
3036 new UnicodeBlock("CYRILLIC_EXTENDED_C",
3037 "CYRILLIC EXTENDED-C",
3038 "CYRILLICEXTENDED-C");
3039
3040 /**
3041 * Constant for the "Osage" Unicode
3042 * character block.
3043 * @since 11
3044 */
3045 public static final UnicodeBlock OSAGE =
3046 new UnicodeBlock("OSAGE");
3047
3048 /**
3049 * Constant for the "Newa" Unicode
3050 * character block.
3051 * @since 11
3052 */
3053 public static final UnicodeBlock NEWA =
3054 new UnicodeBlock("NEWA");
3055
3056 /**
3057 * Constant for the "Mongolian Supplement" Unicode
3058 * character block.
3059 * @since 11
3060 */
3061 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
3062 new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
3063 "MONGOLIAN SUPPLEMENT",
3064 "MONGOLIANSUPPLEMENT");
3065
3066 /**
3067 * Constant for the "Marchen" Unicode
3068 * character block.
3069 * @since 11
3070 */
3071 public static final UnicodeBlock MARCHEN =
3072 new UnicodeBlock("MARCHEN");
3073
3074 /**
3075 * Constant for the "Ideographic Symbols and Punctuation" Unicode
3076 * character block.
3077 * @since 11
3078 */
3079 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3080 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3081 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3082 "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3083
3084 /**
3085 * Constant for the "Tangut" Unicode
3086 * character block.
3087 * @since 11
3088 */
3089 public static final UnicodeBlock TANGUT =
3090 new UnicodeBlock("TANGUT");
3091
3092 /**
3093 * Constant for the "Tangut Components" Unicode
3094 * character block.
3095 * @since 11
3096 */
3097 public static final UnicodeBlock TANGUT_COMPONENTS =
3098 new UnicodeBlock("TANGUT_COMPONENTS",
3099 "TANGUT COMPONENTS",
3100 "TANGUTCOMPONENTS");
3101
3102 /**
3103 * Constant for the "Kana Extended-A" Unicode
3104 * character block.
3105 * @since 11
3106 */
3107 public static final UnicodeBlock KANA_EXTENDED_A =
3108 new UnicodeBlock("KANA_EXTENDED_A",
3109 "KANA EXTENDED-A",
3110 "KANAEXTENDED-A");
3111 /**
3112 * Constant for the "Glagolitic Supplement" Unicode
3113 * character block.
3114 * @since 11
3115 */
3116 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3117 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3118 "GLAGOLITIC SUPPLEMENT",
3119 "GLAGOLITICSUPPLEMENT");
3120 /**
3121 * Constant for the "Adlam" Unicode
3122 * character block.
3123 * @since 11
3124 */
3125 public static final UnicodeBlock ADLAM =
3126 new UnicodeBlock("ADLAM");
3127
3128 /**
3129 * Constant for the "Masaram Gondi" Unicode
3130 * character block.
3131 * @since 11
3132 */
3133 public static final UnicodeBlock MASARAM_GONDI =
3134 new UnicodeBlock("MASARAM_GONDI",
3135 "MASARAM GONDI",
3136 "MASARAMGONDI");
3137
3138 /**
3139 * Constant for the "Zanabazar Square" Unicode
3140 * character block.
3141 * @since 11
3142 */
3143 public static final UnicodeBlock ZANABAZAR_SQUARE =
3144 new UnicodeBlock("ZANABAZAR_SQUARE",
3145 "ZANABAZAR SQUARE",
3146 "ZANABAZARSQUARE");
3147
3148 /**
3149 * Constant for the "Nushu" Unicode
3150 * character block.
3151 * @since 11
3152 */
3153 public static final UnicodeBlock NUSHU =
3154 new UnicodeBlock("NUSHU");
3155
3156 /**
3157 * Constant for the "Soyombo" Unicode
3158 * character block.
3159 * @since 11
3160 */
3161 public static final UnicodeBlock SOYOMBO =
3162 new UnicodeBlock("SOYOMBO");
3163
3164 /**
3165 * Constant for the "Bhaiksuki" Unicode
3166 * character block.
3167 * @since 11
3168 */
3169 public static final UnicodeBlock BHAIKSUKI =
3170 new UnicodeBlock("BHAIKSUKI");
3171
3172 /**
3173 * Constant for the "CJK Unified Ideographs Extension F" Unicode
3174 * character block.
3175 * @since 11
3176 */
3177 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3178 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3179 "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3180 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3181 /**
3182 * Constant for the "Georgian Extended" Unicode
3183 * character block.
3184 * @since 12
3185 */
3186 public static final UnicodeBlock GEORGIAN_EXTENDED =
3187 new UnicodeBlock("GEORGIAN_EXTENDED",
3188 "GEORGIAN EXTENDED",
3189 "GEORGIANEXTENDED");
3190
3191 /**
3192 * Constant for the "Hanifi Rohingya" Unicode
3193 * character block.
3194 * @since 12
3195 */
3196 public static final UnicodeBlock HANIFI_ROHINGYA =
3197 new UnicodeBlock("HANIFI_ROHINGYA",
3198 "HANIFI ROHINGYA",
3199 "HANIFIROHINGYA");
3200
3201 /**
3202 * Constant for the "Old Sogdian" Unicode
3203 * character block.
3204 * @since 12
3205 */
3206 public static final UnicodeBlock OLD_SOGDIAN =
3207 new UnicodeBlock("OLD_SOGDIAN",
3208 "OLD SOGDIAN",
3209 "OLDSOGDIAN");
3210
3211 /**
3212 * Constant for the "Sogdian" Unicode
3213 * character block.
3214 * @since 12
3215 */
3216 public static final UnicodeBlock SOGDIAN =
3217 new UnicodeBlock("SOGDIAN");
3218
3219 /**
3220 * Constant for the "Dogra" Unicode
3221 * character block.
3222 * @since 12
3223 */
3224 public static final UnicodeBlock DOGRA =
3225 new UnicodeBlock("DOGRA");
3226
3227 /**
3228 * Constant for the "Gunjala Gondi" Unicode
3229 * character block.
3230 * @since 12
3231 */
3232 public static final UnicodeBlock GUNJALA_GONDI =
3233 new UnicodeBlock("GUNJALA_GONDI",
3234 "GUNJALA GONDI",
3235 "GUNJALAGONDI");
3236
3237 /**
3238 * Constant for the "Makasar" Unicode
3239 * character block.
3240 * @since 12
3241 */
3242 public static final UnicodeBlock MAKASAR =
3243 new UnicodeBlock("MAKASAR");
3244
3245 /**
3246 * Constant for the "Medefaidrin" Unicode
3247 * character block.
3248 * @since 12
3249 */
3250 public static final UnicodeBlock MEDEFAIDRIN =
3251 new UnicodeBlock("MEDEFAIDRIN");
3252
3253 /**
3254 * Constant for the "Mayan Numerals" Unicode
3255 * character block.
3256 * @since 12
3257 */
3258 public static final UnicodeBlock MAYAN_NUMERALS =
3259 new UnicodeBlock("MAYAN_NUMERALS",
3260 "MAYAN NUMERALS",
3261 "MAYANNUMERALS");
3262
3263 /**
3264 * Constant for the "Indic Siyaq Numbers" Unicode
3265 * character block.
3266 * @since 12
3267 */
3268 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3269 new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3270 "INDIC SIYAQ NUMBERS",
3271 "INDICSIYAQNUMBERS");
3272
3273 /**
3274 * Constant for the "Chess Symbols" Unicode
3275 * character block.
3276 * @since 12
3277 */
3278 public static final UnicodeBlock CHESS_SYMBOLS =
3279 new UnicodeBlock("CHESS_SYMBOLS",
3280 "CHESS SYMBOLS",
3281 "CHESSSYMBOLS");
3282
3283 /**
3284 * Constant for the "Elymaic" Unicode
3285 * character block.
3286 * @since 13
3287 */
3288 public static final UnicodeBlock ELYMAIC =
3289 new UnicodeBlock("ELYMAIC");
3290
3291 /**
3292 * Constant for the "Nandinagari" Unicode
3293 * character block.
3294 * @since 13
3295 */
3296 public static final UnicodeBlock NANDINAGARI =
3297 new UnicodeBlock("NANDINAGARI");
3298
3299 /**
3300 * Constant for the "Tamil Supplement" Unicode
3301 * character block.
3302 * @since 13
3303 */
3304 public static final UnicodeBlock TAMIL_SUPPLEMENT =
3305 new UnicodeBlock("TAMIL_SUPPLEMENT",
3306 "TAMIL SUPPLEMENT",
3307 "TAMILSUPPLEMENT");
3308
3309 /**
3310 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3311 * character block.
3312 * @since 13
3313 */
3314 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3315 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3316 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3317 "EGYPTIANHIEROGLYPHFORMATCONTROLS");
3318
3319 /**
3320 * Constant for the "Small Kana Extension" Unicode
3321 * character block.
3322 * @since 13
3323 */
3324 public static final UnicodeBlock SMALL_KANA_EXTENSION =
3325 new UnicodeBlock("SMALL_KANA_EXTENSION",
3326 "SMALL KANA EXTENSION",
3327 "SMALLKANAEXTENSION");
3328
3329 /**
3330 * Constant for the "Nyiakeng Puachue Hmong" Unicode
3331 * character block.
3332 * @since 13
3333 */
3334 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3335 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3336 "NYIAKENG PUACHUE HMONG",
3337 "NYIAKENGPUACHUEHMONG");
3338
3339 /**
3340 * Constant for the "Wancho" Unicode
3341 * character block.
3342 * @since 13
3343 */
3344 public static final UnicodeBlock WANCHO =
3345 new UnicodeBlock("WANCHO");
3346
3347 /**
3348 * Constant for the "Ottoman Siyaq Numbers" Unicode
3349 * character block.
3350 * @since 13
3351 */
3352 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3353 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3354 "OTTOMAN SIYAQ NUMBERS",
3355 "OTTOMANSIYAQNUMBERS");
3356
3357 /**
3358 * Constant for the "Symbols and Pictographs Extended-A" Unicode
3359 * character block.
3360 * @since 13
3361 */
3362 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3363 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3364 "SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3365 "SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3366
3367 /**
3368 * Constant for the "Yezidi" Unicode
3369 * character block.
3370 * @since 15
3371 */
3372 public static final UnicodeBlock YEZIDI =
3373 new UnicodeBlock("YEZIDI");
3374
3375 /**
3376 * Constant for the "Chorasmian" Unicode
3377 * character block.
3378 * @since 15
3379 */
3380 public static final UnicodeBlock CHORASMIAN =
3381 new UnicodeBlock("CHORASMIAN");
3382
3383 /**
3384 * Constant for the "Dives Akuru" Unicode
3385 * character block.
3386 * @since 15
3387 */
3388 public static final UnicodeBlock DIVES_AKURU =
3389 new UnicodeBlock("DIVES_AKURU",
3390 "DIVES AKURU",
3391 "DIVESAKURU");
3392
3393 /**
3394 * Constant for the "Lisu Supplement" Unicode
3395 * character block.
3396 * @since 15
3397 */
3398 public static final UnicodeBlock LISU_SUPPLEMENT =
3399 new UnicodeBlock("LISU_SUPPLEMENT",
3400 "LISU SUPPLEMENT",
3401 "LISUSUPPLEMENT");
3402
3403 /**
3404 * Constant for the "Khitan Small Script" Unicode
3405 * character block.
3406 * @since 15
3407 */
3408 public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
3409 new UnicodeBlock("KHITAN_SMALL_SCRIPT",
3410 "KHITAN SMALL SCRIPT",
3411 "KHITANSMALLSCRIPT");
3412
3413 /**
3414 * Constant for the "Tangut Supplement" Unicode
3415 * character block.
3416 * @since 15
3417 */
3418 public static final UnicodeBlock TANGUT_SUPPLEMENT =
3419 new UnicodeBlock("TANGUT_SUPPLEMENT",
3420 "TANGUT SUPPLEMENT",
3421 "TANGUTSUPPLEMENT");
3422
3423 /**
3424 * Constant for the "Symbols for Legacy Computing" Unicode
3425 * character block.
3426 * @since 15
3427 */
3428 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
3429 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING",
3430 "SYMBOLS FOR LEGACY COMPUTING",
3431 "SYMBOLSFORLEGACYCOMPUTING");
3432
3433 /**
3434 * Constant for the "CJK Unified Ideographs Extension G" Unicode
3435 * character block.
3436 * @since 15
3437 */
3438 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
3439 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
3440 "CJK UNIFIED IDEOGRAPHS EXTENSION G",
3441 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG");
3442
3443 /**
3444 * Constant for the "Arabic Extended-B" Unicode
3445 * character block.
3446 * @since 19
3447 */
3448 public static final UnicodeBlock ARABIC_EXTENDED_B =
3449 new UnicodeBlock("ARABIC_EXTENDED_B",
3450 "ARABIC EXTENDED-B",
3451 "ARABICEXTENDED-B");
3452
3453 /**
3454 * Constant for the "Vithkuqi" Unicode
3455 * character block.
3456 * @since 19
3457 */
3458 public static final UnicodeBlock VITHKUQI =
3459 new UnicodeBlock("VITHKUQI");
3460
3461 /**
3462 * Constant for the "Latin Extended-F" Unicode
3463 * character block.
3464 * @since 19
3465 */
3466 public static final UnicodeBlock LATIN_EXTENDED_F =
3467 new UnicodeBlock("LATIN_EXTENDED_F",
3468 "LATIN EXTENDED-F",
3469 "LATINEXTENDED-F");
3470
3471 /**
3472 * Constant for the "Old Uyghur" Unicode
3473 * character block.
3474 * @since 19
3475 */
3476 public static final UnicodeBlock OLD_UYGHUR =
3477 new UnicodeBlock("OLD_UYGHUR",
3478 "OLD UYGHUR",
3479 "OLDUYGHUR");
3480
3481 /**
3482 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode
3483 * character block.
3484 * @since 19
3485 */
3486 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A =
3487 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A",
3488 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A",
3489 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A");
3490
3491 /**
3492 * Constant for the "Cypro-Minoan" Unicode
3493 * character block.
3494 * @since 19
3495 */
3496 public static final UnicodeBlock CYPRO_MINOAN =
3497 new UnicodeBlock("CYPRO_MINOAN",
3498 "CYPRO-MINOAN",
3499 "CYPRO-MINOAN");
3500
3501 /**
3502 * Constant for the "Tangsa" Unicode
3503 * character block.
3504 * @since 19
3505 */
3506 public static final UnicodeBlock TANGSA =
3507 new UnicodeBlock("TANGSA");
3508
3509 /**
3510 * Constant for the "Kana Extended-B" Unicode
3511 * character block.
3512 * @since 19
3513 */
3514 public static final UnicodeBlock KANA_EXTENDED_B =
3515 new UnicodeBlock("KANA_EXTENDED_B",
3516 "KANA EXTENDED-B",
3517 "KANAEXTENDED-B");
3518
3519 /**
3520 * Constant for the "Znamenny Musical Notation" Unicode
3521 * character block.
3522 * @since 19
3523 */
3524 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION =
3525 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION",
3526 "ZNAMENNY MUSICAL NOTATION",
3527 "ZNAMENNYMUSICALNOTATION");
3528
3529 /**
3530 * Constant for the "Latin Extended-G" Unicode
3531 * character block.
3532 * @since 19
3533 */
3534 public static final UnicodeBlock LATIN_EXTENDED_G =
3535 new UnicodeBlock("LATIN_EXTENDED_G",
3536 "LATIN EXTENDED-G",
3537 "LATINEXTENDED-G");
3538
3539 /**
3540 * Constant for the "Toto" Unicode
3541 * character block.
3542 * @since 19
3543 */
3544 public static final UnicodeBlock TOTO =
3545 new UnicodeBlock("TOTO");
3546
3547 /**
3548 * Constant for the "Ethiopic Extended-B" Unicode
3549 * character block.
3550 * @since 19
3551 */
3552 public static final UnicodeBlock ETHIOPIC_EXTENDED_B =
3553 new UnicodeBlock("ETHIOPIC_EXTENDED_B",
3554 "ETHIOPIC EXTENDED-B",
3555 "ETHIOPICEXTENDED-B");
3556
3557 /**
3558 * Constant for the "Arabic Extended-C" Unicode
3559 * character block.
3560 * @since 20
3561 */
3562 public static final UnicodeBlock ARABIC_EXTENDED_C =
3563 new UnicodeBlock("ARABIC_EXTENDED_C",
3564 "ARABIC EXTENDED-C",
3565 "ARABICEXTENDED-C");
3566
3567 /**
3568 * Constant for the "Devanagari Extended-A" Unicode
3569 * character block.
3570 * @since 20
3571 */
3572 public static final UnicodeBlock DEVANAGARI_EXTENDED_A =
3573 new UnicodeBlock("DEVANAGARI_EXTENDED_A",
3574 "DEVANAGARI EXTENDED-A",
3575 "DEVANAGARIEXTENDED-A");
3576
3577 /**
3578 * Constant for the "Kawi" Unicode
3579 * character block.
3580 * @since 20
3581 */
3582 public static final UnicodeBlock KAWI =
3583 new UnicodeBlock("KAWI");
3584
3585 /**
3586 * Constant for the "Kaktovik Numerals" Unicode
3587 * character block.
3588 * @since 20
3589 */
3590 public static final UnicodeBlock KAKTOVIK_NUMERALS =
3591 new UnicodeBlock("KAKTOVIK_NUMERALS",
3592 "KAKTOVIK NUMERALS",
3593 "KAKTOVIKNUMERALS");
3594
3595 /**
3596 * Constant for the "Cyrillic Extended-D" Unicode
3597 * character block.
3598 * @since 20
3599 */
3600 public static final UnicodeBlock CYRILLIC_EXTENDED_D =
3601 new UnicodeBlock("CYRILLIC_EXTENDED_D",
3602 "CYRILLIC EXTENDED-D",
3603 "CYRILLICEXTENDED-D");
3604
3605 /**
3606 * Constant for the "Nag Mundari" Unicode
3607 * character block.
3608 * @since 20
3609 */
3610 public static final UnicodeBlock NAG_MUNDARI =
3611 new UnicodeBlock("NAG_MUNDARI",
3612 "NAG MUNDARI",
3613 "NAGMUNDARI");
3614
3615 /**
3616 * Constant for the "CJK Unified Ideographs Extension H" Unicode
3617 * character block.
3618 * @since 20
3619 */
3620 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H =
3621 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H",
3622 "CJK UNIFIED IDEOGRAPHS EXTENSION H",
3623 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH");
3624
3625 /**
3626 * Constant for the "CJK Unified Ideographs Extension I" Unicode
3627 * character block.
3628 * @since 22
3629 */
3630 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I =
3631 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I",
3632 "CJK UNIFIED IDEOGRAPHS EXTENSION I",
3633 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI");
3634
3635 /**
3636 * Constant for the "Todhri" Unicode
3637 * character block.
3638 * @since 24
3639 */
3640 public static final UnicodeBlock TODHRI =
3641 new UnicodeBlock("TODHRI");
3642
3643 /**
3644 * Constant for the "Garay" Unicode
3645 * character block.
3646 * @since 24
3647 */
3648 public static final UnicodeBlock GARAY =
3649 new UnicodeBlock("GARAY");
3650
3651 /**
3652 * Constant for the "Tulu-Tigalari" Unicode
3653 * character block.
3654 * @since 24
3655 */
3656 public static final UnicodeBlock TULU_TIGALARI =
3657 new UnicodeBlock("TULU_TIGALARI",
3658 "TULU-TIGALARI");
3659
3660 /**
3661 * Constant for the "Myanmar Extended-C" Unicode
3662 * character block.
3663 * @since 24
3664 */
3665 public static final UnicodeBlock MYANMAR_EXTENDED_C =
3666 new UnicodeBlock("MYANMAR_EXTENDED_C",
3667 "MYANMAR EXTENDED-C",
3668 "MYANMAREXTENDED-C");
3669
3670 /**
3671 * Constant for the "Sunuwar" Unicode
3672 * character block.
3673 * @since 24
3674 */
3675 public static final UnicodeBlock SUNUWAR =
3676 new UnicodeBlock("SUNUWAR");
3677
3678 /**
3679 * Constant for the "Egyptian Hieroglyphs Extended-A" Unicode
3680 * character block.
3681 * @since 24
3682 */
3683 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS_EXTENDED_A =
3684 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS_EXTENDED_A",
3685 "EGYPTIAN HIEROGLYPHS EXTENDED-A",
3686 "EGYPTIANHIEROGLYPHSEXTENDED-A");
3687
3688 /**
3689 * Constant for the "Gurung Khema" Unicode
3690 * character block.
3691 * @since 24
3692 */
3693 public static final UnicodeBlock GURUNG_KHEMA =
3694 new UnicodeBlock("GURUNG_KHEMA",
3695 "GURUNG KHEMA",
3696 "GURUNGKHEMA");
3697
3698 /**
3699 * Constant for the "Kirat Rai" Unicode
3700 * character block.
3701 * @since 24
3702 */
3703 public static final UnicodeBlock KIRAT_RAI =
3704 new UnicodeBlock("KIRAT_RAI",
3705 "KIRAT RAI",
3706 "KIRATRAI");
3707
3708 /**
3709 * Constant for the "Symbols for Legacy Computing Supplement" Unicode
3710 * character block.
3711 * @since 24
3712 */
3713 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT =
3714 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT",
3715 "SYMBOLS FOR LEGACY COMPUTING SUPPLEMENT",
3716 "SYMBOLSFORLEGACYCOMPUTINGSUPPLEMENT");
3717
3718 /**
3719 * Constant for the "Ol Onal" Unicode
3720 * character block.
3721 * @since 24
3722 */
3723 public static final UnicodeBlock OL_ONAL =
3724 new UnicodeBlock("OL_ONAL",
3725 "OL ONAL",
3726 "OLONAL");
3727
3728 private static final int[] blockStarts = {
3729 0x0000, // 0000..007F; Basic Latin
3730 0x0080, // 0080..00FF; Latin-1 Supplement
3731 0x0100, // 0100..017F; Latin Extended-A
3732 0x0180, // 0180..024F; Latin Extended-B
3733 0x0250, // 0250..02AF; IPA Extensions
3734 0x02B0, // 02B0..02FF; Spacing Modifier Letters
3735 0x0300, // 0300..036F; Combining Diacritical Marks
3736 0x0370, // 0370..03FF; Greek and Coptic
3737 0x0400, // 0400..04FF; Cyrillic
3738 0x0500, // 0500..052F; Cyrillic Supplement
3739 0x0530, // 0530..058F; Armenian
3740 0x0590, // 0590..05FF; Hebrew
3741 0x0600, // 0600..06FF; Arabic
3742 0x0700, // 0700..074F; Syriac
3743 0x0750, // 0750..077F; Arabic Supplement
3744 0x0780, // 0780..07BF; Thaana
3745 0x07C0, // 07C0..07FF; NKo
3746 0x0800, // 0800..083F; Samaritan
3747 0x0840, // 0840..085F; Mandaic
3748 0x0860, // 0860..086F; Syriac Supplement
3749 0x0870, // 0870..089F; Arabic Extended-B
3750 0x08A0, // 08A0..08FF; Arabic Extended-A
3751 0x0900, // 0900..097F; Devanagari
3752 0x0980, // 0980..09FF; Bengali
3753 0x0A00, // 0A00..0A7F; Gurmukhi
3754 0x0A80, // 0A80..0AFF; Gujarati
3755 0x0B00, // 0B00..0B7F; Oriya
3756 0x0B80, // 0B80..0BFF; Tamil
3757 0x0C00, // 0C00..0C7F; Telugu
3758 0x0C80, // 0C80..0CFF; Kannada
3759 0x0D00, // 0D00..0D7F; Malayalam
3760 0x0D80, // 0D80..0DFF; Sinhala
3761 0x0E00, // 0E00..0E7F; Thai
3762 0x0E80, // 0E80..0EFF; Lao
3763 0x0F00, // 0F00..0FFF; Tibetan
3764 0x1000, // 1000..109F; Myanmar
3765 0x10A0, // 10A0..10FF; Georgian
3766 0x1100, // 1100..11FF; Hangul Jamo
3767 0x1200, // 1200..137F; Ethiopic
3768 0x1380, // 1380..139F; Ethiopic Supplement
3769 0x13A0, // 13A0..13FF; Cherokee
3770 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
3771 0x1680, // 1680..169F; Ogham
3772 0x16A0, // 16A0..16FF; Runic
3773 0x1700, // 1700..171F; Tagalog
3774 0x1720, // 1720..173F; Hanunoo
3775 0x1740, // 1740..175F; Buhid
3776 0x1760, // 1760..177F; Tagbanwa
3777 0x1780, // 1780..17FF; Khmer
3778 0x1800, // 1800..18AF; Mongolian
3779 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3780 0x1900, // 1900..194F; Limbu
3781 0x1950, // 1950..197F; Tai Le
3782 0x1980, // 1980..19DF; New Tai Lue
3783 0x19E0, // 19E0..19FF; Khmer Symbols
3784 0x1A00, // 1A00..1A1F; Buginese
3785 0x1A20, // 1A20..1AAF; Tai Tham
3786 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended
3787 0x1B00, // 1B00..1B7F; Balinese
3788 0x1B80, // 1B80..1BBF; Sundanese
3789 0x1BC0, // 1BC0..1BFF; Batak
3790 0x1C00, // 1C00..1C4F; Lepcha
3791 0x1C50, // 1C50..1C7F; Ol Chiki
3792 0x1C80, // 1C80..1C8F; Cyrillic Extended-C
3793 0x1C90, // 1C90..1CBF; Georgian Extended
3794 0x1CC0, // 1CC0..1CCF; Sundanese Supplement
3795 0x1CD0, // 1CD0..1CFF; Vedic Extensions
3796 0x1D00, // 1D00..1D7F; Phonetic Extensions
3797 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
3798 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
3799 0x1E00, // 1E00..1EFF; Latin Extended Additional
3800 0x1F00, // 1F00..1FFF; Greek Extended
3801 0x2000, // 2000..206F; General Punctuation
3802 0x2070, // 2070..209F; Superscripts and Subscripts
3803 0x20A0, // 20A0..20CF; Currency Symbols
3804 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
3805 0x2100, // 2100..214F; Letterlike Symbols
3806 0x2150, // 2150..218F; Number Forms
3807 0x2190, // 2190..21FF; Arrows
3808 0x2200, // 2200..22FF; Mathematical Operators
3809 0x2300, // 2300..23FF; Miscellaneous Technical
3810 0x2400, // 2400..243F; Control Pictures
3811 0x2440, // 2440..245F; Optical Character Recognition
3812 0x2460, // 2460..24FF; Enclosed Alphanumerics
3813 0x2500, // 2500..257F; Box Drawing
3814 0x2580, // 2580..259F; Block Elements
3815 0x25A0, // 25A0..25FF; Geometric Shapes
3816 0x2600, // 2600..26FF; Miscellaneous Symbols
3817 0x2700, // 2700..27BF; Dingbats
3818 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3819 0x27F0, // 27F0..27FF; Supplemental Arrows-A
3820 0x2800, // 2800..28FF; Braille Patterns
3821 0x2900, // 2900..297F; Supplemental Arrows-B
3822 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B
3823 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators
3824 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows
3825 0x2C00, // 2C00..2C5F; Glagolitic
3826 0x2C60, // 2C60..2C7F; Latin Extended-C
3827 0x2C80, // 2C80..2CFF; Coptic
3828 0x2D00, // 2D00..2D2F; Georgian Supplement
3829 0x2D30, // 2D30..2D7F; Tifinagh
3830 0x2D80, // 2D80..2DDF; Ethiopic Extended
3831 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A
3832 0x2E00, // 2E00..2E7F; Supplemental Punctuation
3833 0x2E80, // 2E80..2EFF; CJK Radicals Supplement
3834 0x2F00, // 2F00..2FDF; Kangxi Radicals
3835 0x2FE0, // unassigned
3836 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters
3837 0x3000, // 3000..303F; CJK Symbols and Punctuation
3838 0x3040, // 3040..309F; Hiragana
3839 0x30A0, // 30A0..30FF; Katakana
3840 0x3100, // 3100..312F; Bopomofo
3841 0x3130, // 3130..318F; Hangul Compatibility Jamo
3842 0x3190, // 3190..319F; Kanbun
3843 0x31A0, // 31A0..31BF; Bopomofo Extended
3844 0x31C0, // 31C0..31EF; CJK Strokes
3845 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions
3846 0x3200, // 3200..32FF; Enclosed CJK Letters and Months
3847 0x3300, // 3300..33FF; CJK Compatibility
3848 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
3849 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
3850 0x4E00, // 4E00..9FFF; CJK Unified Ideographs
3851 0xA000, // A000..A48F; Yi Syllables
3852 0xA490, // A490..A4CF; Yi Radicals
3853 0xA4D0, // A4D0..A4FF; Lisu
3854 0xA500, // A500..A63F; Vai
3855 0xA640, // A640..A69F; Cyrillic Extended-B
3856 0xA6A0, // A6A0..A6FF; Bamum
3857 0xA700, // A700..A71F; Modifier Tone Letters
3858 0xA720, // A720..A7FF; Latin Extended-D
3859 0xA800, // A800..A82F; Syloti Nagri
3860 0xA830, // A830..A83F; Common Indic Number Forms
3861 0xA840, // A840..A87F; Phags-pa
3862 0xA880, // A880..A8DF; Saurashtra
3863 0xA8E0, // A8E0..A8FF; Devanagari Extended
3864 0xA900, // A900..A92F; Kayah Li
3865 0xA930, // A930..A95F; Rejang
3866 0xA960, // A960..A97F; Hangul Jamo Extended-A
3867 0xA980, // A980..A9DF; Javanese
3868 0xA9E0, // A9E0..A9FF; Myanmar Extended-B
3869 0xAA00, // AA00..AA5F; Cham
3870 0xAA60, // AA60..AA7F; Myanmar Extended-A
3871 0xAA80, // AA80..AADF; Tai Viet
3872 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions
3873 0xAB00, // AB00..AB2F; Ethiopic Extended-A
3874 0xAB30, // AB30..AB6F; Latin Extended-E
3875 0xAB70, // AB70..ABBF; Cherokee Supplement
3876 0xABC0, // ABC0..ABFF; Meetei Mayek
3877 0xAC00, // AC00..D7AF; Hangul Syllables
3878 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
3879 0xD800, // D800..DB7F; High Surrogates
3880 0xDB80, // DB80..DBFF; High Private Use Surrogates
3881 0xDC00, // DC00..DFFF; Low Surrogates
3882 0xE000, // E000..F8FF; Private Use Area
3883 0xF900, // F900..FAFF; CJK Compatibility Ideographs
3884 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
3885 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
3886 0xFE00, // FE00..FE0F; Variation Selectors
3887 0xFE10, // FE10..FE1F; Vertical Forms
3888 0xFE20, // FE20..FE2F; Combining Half Marks
3889 0xFE30, // FE30..FE4F; CJK Compatibility Forms
3890 0xFE50, // FE50..FE6F; Small Form Variants
3891 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
3892 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
3893 0xFFF0, // FFF0..FFFF; Specials
3894 0x10000, // 10000..1007F; Linear B Syllabary
3895 0x10080, // 10080..100FF; Linear B Ideograms
3896 0x10100, // 10100..1013F; Aegean Numbers
3897 0x10140, // 10140..1018F; Ancient Greek Numbers
3898 0x10190, // 10190..101CF; Ancient Symbols
3899 0x101D0, // 101D0..101FF; Phaistos Disc
3900 0x10200, // unassigned
3901 0x10280, // 10280..1029F; Lycian
3902 0x102A0, // 102A0..102DF; Carian
3903 0x102E0, // 102E0..102FF; Coptic Epact Numbers
3904 0x10300, // 10300..1032F; Old Italic
3905 0x10330, // 10330..1034F; Gothic
3906 0x10350, // 10350..1037F; Old Permic
3907 0x10380, // 10380..1039F; Ugaritic
3908 0x103A0, // 103A0..103DF; Old Persian
3909 0x103E0, // unassigned
3910 0x10400, // 10400..1044F; Deseret
3911 0x10450, // 10450..1047F; Shavian
3912 0x10480, // 10480..104AF; Osmanya
3913 0x104B0, // 104B0..104FF; Osage
3914 0x10500, // 10500..1052F; Elbasan
3915 0x10530, // 10530..1056F; Caucasian Albanian
3916 0x10570, // 10570..105BF; Vithkuqi
3917 0x105C0, // 105C0..105FF; Todhri
3918 0x10600, // 10600..1077F; Linear A
3919 0x10780, // 10780..107BF; Latin Extended-F
3920 0x107C0, // unassigned
3921 0x10800, // 10800..1083F; Cypriot Syllabary
3922 0x10840, // 10840..1085F; Imperial Aramaic
3923 0x10860, // 10860..1087F; Palmyrene
3924 0x10880, // 10880..108AF; Nabataean
3925 0x108B0, // unassigned
3926 0x108E0, // 108E0..108FF; Hatran
3927 0x10900, // 10900..1091F; Phoenician
3928 0x10920, // 10920..1093F; Lydian
3929 0x10940, // unassigned
3930 0x10980, // 10980..1099F; Meroitic Hieroglyphs
3931 0x109A0, // 109A0..109FF; Meroitic Cursive
3932 0x10A00, // 10A00..10A5F; Kharoshthi
3933 0x10A60, // 10A60..10A7F; Old South Arabian
3934 0x10A80, // 10A80..10A9F; Old North Arabian
3935 0x10AA0, // unassigned
3936 0x10AC0, // 10AC0..10AFF; Manichaean
3937 0x10B00, // 10B00..10B3F; Avestan
3938 0x10B40, // 10B40..10B5F; Inscriptional Parthian
3939 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
3940 0x10B80, // 10B80..10BAF; Psalter Pahlavi
3941 0x10BB0, // unassigned
3942 0x10C00, // 10C00..10C4F; Old Turkic
3943 0x10C50, // unassigned
3944 0x10C80, // 10C80..10CFF; Old Hungarian
3945 0x10D00, // 10D00..10D3F; Hanifi Rohingya
3946 0x10D40, // 10D40..10D8F; Garay
3947 0x10D90, // unassigned
3948 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
3949 0x10E80, // 10E80..10EBF; Yezidi
3950 0x10EC0, // 10EC0..10EFF; Arabic Extended-C
3951 0x10F00, // 10F00..10F2F; Old Sogdian
3952 0x10F30, // 10F30..10F6F; Sogdian
3953 0x10F70, // 10F70..10FAF; Old Uyghur
3954 0x10FB0, // 10FB0..10FDF; Chorasmian
3955 0x10FE0, // 10FE0..10FFF; Elymaic
3956 0x11000, // 11000..1107F; Brahmi
3957 0x11080, // 11080..110CF; Kaithi
3958 0x110D0, // 110D0..110FF; Sora Sompeng
3959 0x11100, // 11100..1114F; Chakma
3960 0x11150, // 11150..1117F; Mahajani
3961 0x11180, // 11180..111DF; Sharada
3962 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers
3963 0x11200, // 11200..1124F; Khojki
3964 0x11250, // unassigned
3965 0x11280, // 11280..112AF; Multani
3966 0x112B0, // 112B0..112FF; Khudawadi
3967 0x11300, // 11300..1137F; Grantha
3968 0x11380, // 11380..113FF; Tulu-Tigalari
3969 0x11400, // 11400..1147F; Newa
3970 0x11480, // 11480..114DF; Tirhuta
3971 0x114E0, // unassigned
3972 0x11580, // 11580..115FF; Siddham
3973 0x11600, // 11600..1165F; Modi
3974 0x11660, // 11660..1167F; Mongolian Supplement
3975 0x11680, // 11680..116CF; Takri
3976 0x116D0, // 116D0..116FF; Myanmar Extended-C
3977 0x11700, // 11700..1174F; Ahom
3978 0x11750, // unassigned
3979 0x11800, // 11800..1184F; Dogra
3980 0x11850, // unassigned
3981 0x118A0, // 118A0..118FF; Warang Citi
3982 0x11900, // 11900..1195F; Dives Akuru
3983 0x11960, // unassigned
3984 0x119A0, // 119A0..119FF; Nandinagari
3985 0x11A00, // 11A00..11A4F; Zanabazar Square
3986 0x11A50, // 11A50..11AAF; Soyombo
3987 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
3988 0x11AC0, // 11AC0..11AFF; Pau Cin Hau
3989 0x11B00, // 11B00..11B5F; Devanagari Extended-A
3990 0x11B60, // unassigned
3991 0x11BC0, // 11BC0..11BFF; Sunuwar
3992 0x11C00, // 11C00..11C6F; Bhaiksuki
3993 0x11C70, // 11C70..11CBF; Marchen
3994 0x11CC0, // unassigned
3995 0x11D00, // 11D00..11D5F; Masaram Gondi
3996 0x11D60, // 11D60..11DAF; Gunjala Gondi
3997 0x11DB0, // unassigned
3998 0x11EE0, // 11EE0..11EFF; Makasar
3999 0x11F00, // 11F00..11F5F; Kawi
4000 0x11F60, // unassigned
4001 0x11FB0, // 11FB0..11FBF; Lisu Supplement
4002 0x11FC0, // 11FC0..11FFF; Tamil Supplement
4003 0x12000, // 12000..123FF; Cuneiform
4004 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
4005 0x12480, // 12480..1254F; Early Dynastic Cuneiform
4006 0x12550, // unassigned
4007 0x12F90, // 12F90..12FFF; Cypro-Minoan
4008 0x13000, // 13000..1342F; Egyptian Hieroglyphs
4009 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls
4010 0x13460, // 13460..143FF; Egyptian Hieroglyphs Extended-A
4011 0x14400, // 14400..1467F; Anatolian Hieroglyphs
4012 0x14680, // unassigned
4013 0x16100, // 16100..1613F; Gurung Khema
4014 0x16140, // unassigned
4015 0x16800, // 16800..16A3F; Bamum Supplement
4016 0x16A40, // 16A40..16A6F; Mro
4017 0x16A70, // 16A70..16ACF; Tangsa
4018 0x16AD0, // 16AD0..16AFF; Bassa Vah
4019 0x16B00, // 16B00..16B8F; Pahawh Hmong
4020 0x16B90, // unassigned
4021 0x16D40, // 16D40..16D7F; Kirat Rai
4022 0x16D80, // unassigned
4023 0x16E40, // 16E40..16E9F; Medefaidrin
4024 0x16EA0, // unassigned
4025 0x16F00, // 16F00..16F9F; Miao
4026 0x16FA0, // unassigned
4027 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation
4028 0x17000, // 17000..187FF; Tangut
4029 0x18800, // 18800..18AFF; Tangut Components
4030 0x18B00, // 18B00..18CFF; Khitan Small Script
4031 0x18D00, // 18D00..18D7F; Tangut Supplement
4032 0x18D80, // unassigned
4033 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B
4034 0x1B000, // 1B000..1B0FF; Kana Supplement
4035 0x1B100, // 1B100..1B12F; Kana Extended-A
4036 0x1B130, // 1B130..1B16F; Small Kana Extension
4037 0x1B170, // 1B170..1B2FF; Nushu
4038 0x1B300, // unassigned
4039 0x1BC00, // 1BC00..1BC9F; Duployan
4040 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls
4041 0x1BCB0, // unassigned
4042 0x1CC00, // 1CC00..1CEBF; Symbols for Legacy Computing Supplement
4043 0x1CEC0, // unassigned
4044 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation
4045 0x1CFD0, // unassigned
4046 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
4047 0x1D100, // 1D100..1D1FF; Musical Symbols
4048 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
4049 0x1D250, // unassigned
4050 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals
4051 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals
4052 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
4053 0x1D360, // 1D360..1D37F; Counting Rod Numerals
4054 0x1D380, // unassigned
4055 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
4056 0x1D800, // 1D800..1DAAF; Sutton SignWriting
4057 0x1DAB0, // unassigned
4058 0x1DF00, // 1DF00..1DFFF; Latin Extended-G
4059 0x1E000, // 1E000..1E02F; Glagolitic Supplement
4060 0x1E030, // 1E030..1E08F; Cyrillic Extended-D
4061 0x1E090, // unassigned
4062 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong
4063 0x1E150, // unassigned
4064 0x1E290, // 1E290..1E2BF; Toto
4065 0x1E2C0, // 1E2C0..1E2FF; Wancho
4066 0x1E300, // unassigned
4067 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari
4068 0x1E500, // unassigned
4069 0x1E5D0, // 1E5D0..1E5FF; Ol Onal
4070 0x1E600, // unassigned
4071 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B
4072 0x1E800, // 1E800..1E8DF; Mende Kikakui
4073 0x1E8E0, // unassigned
4074 0x1E900, // 1E900..1E95F; Adlam
4075 0x1E960, // unassigned
4076 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers
4077 0x1ECC0, // unassigned
4078 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers
4079 0x1ED50, // unassigned
4080 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
4081 0x1EF00, // unassigned
4082 0x1F000, // 1F000..1F02F; Mahjong Tiles
4083 0x1F030, // 1F030..1F09F; Domino Tiles
4084 0x1F0A0, // 1F0A0..1F0FF; Playing Cards
4085 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
4086 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
4087 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
4088 0x1F600, // 1F600..1F64F; Emoticons
4089 0x1F650, // 1F650..1F67F; Ornamental Dingbats
4090 0x1F680, // 1F680..1F6FF; Transport and Map Symbols
4091 0x1F700, // 1F700..1F77F; Alchemical Symbols
4092 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended
4093 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C
4094 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs
4095 0x1FA00, // 1FA00..1FA6F; Chess Symbols
4096 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A
4097 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing
4098 0x1FC00, // unassigned
4099 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
4100 0x2A6E0, // unassigned
4101 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
4102 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
4103 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
4104 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
4105 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I
4106 0x2EE60, // unassigned
4107 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
4108 0x2FA20, // unassigned
4109 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G
4110 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H
4111 0x323B0, // unassigned
4112 0xE0000, // E0000..E007F; Tags
4113 0xE0080, // unassigned
4114 0xE0100, // E0100..E01EF; Variation Selectors Supplement
4115 0xE01F0, // unassigned
4116 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
4117 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
4118 };
4119
4120 private static final UnicodeBlock[] blocks = {
4121 BASIC_LATIN,
4122 LATIN_1_SUPPLEMENT,
4123 LATIN_EXTENDED_A,
4124 LATIN_EXTENDED_B,
4125 IPA_EXTENSIONS,
4126 SPACING_MODIFIER_LETTERS,
4127 COMBINING_DIACRITICAL_MARKS,
4128 GREEK,
4129 CYRILLIC,
4130 CYRILLIC_SUPPLEMENTARY,
4131 ARMENIAN,
4132 HEBREW,
4133 ARABIC,
4134 SYRIAC,
4135 ARABIC_SUPPLEMENT,
4136 THAANA,
4137 NKO,
4138 SAMARITAN,
4139 MANDAIC,
4140 SYRIAC_SUPPLEMENT,
4141 ARABIC_EXTENDED_B,
4142 ARABIC_EXTENDED_A,
4143 DEVANAGARI,
4144 BENGALI,
4145 GURMUKHI,
4146 GUJARATI,
4147 ORIYA,
4148 TAMIL,
4149 TELUGU,
4150 KANNADA,
4151 MALAYALAM,
4152 SINHALA,
4153 THAI,
4154 LAO,
4155 TIBETAN,
4156 MYANMAR,
4157 GEORGIAN,
4158 HANGUL_JAMO,
4159 ETHIOPIC,
4160 ETHIOPIC_SUPPLEMENT,
4161 CHEROKEE,
4162 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
4163 OGHAM,
4164 RUNIC,
4165 TAGALOG,
4166 HANUNOO,
4167 BUHID,
4168 TAGBANWA,
4169 KHMER,
4170 MONGOLIAN,
4171 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
4172 LIMBU,
4173 TAI_LE,
4174 NEW_TAI_LUE,
4175 KHMER_SYMBOLS,
4176 BUGINESE,
4177 TAI_THAM,
4178 COMBINING_DIACRITICAL_MARKS_EXTENDED,
4179 BALINESE,
4180 SUNDANESE,
4181 BATAK,
4182 LEPCHA,
4183 OL_CHIKI,
4184 CYRILLIC_EXTENDED_C,
4185 GEORGIAN_EXTENDED,
4186 SUNDANESE_SUPPLEMENT,
4187 VEDIC_EXTENSIONS,
4188 PHONETIC_EXTENSIONS,
4189 PHONETIC_EXTENSIONS_SUPPLEMENT,
4190 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
4191 LATIN_EXTENDED_ADDITIONAL,
4192 GREEK_EXTENDED,
4193 GENERAL_PUNCTUATION,
4194 SUPERSCRIPTS_AND_SUBSCRIPTS,
4195 CURRENCY_SYMBOLS,
4196 COMBINING_MARKS_FOR_SYMBOLS,
4197 LETTERLIKE_SYMBOLS,
4198 NUMBER_FORMS,
4199 ARROWS,
4200 MATHEMATICAL_OPERATORS,
4201 MISCELLANEOUS_TECHNICAL,
4202 CONTROL_PICTURES,
4203 OPTICAL_CHARACTER_RECOGNITION,
4204 ENCLOSED_ALPHANUMERICS,
4205 BOX_DRAWING,
4206 BLOCK_ELEMENTS,
4207 GEOMETRIC_SHAPES,
4208 MISCELLANEOUS_SYMBOLS,
4209 DINGBATS,
4210 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
4211 SUPPLEMENTAL_ARROWS_A,
4212 BRAILLE_PATTERNS,
4213 SUPPLEMENTAL_ARROWS_B,
4214 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
4215 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
4216 MISCELLANEOUS_SYMBOLS_AND_ARROWS,
4217 GLAGOLITIC,
4218 LATIN_EXTENDED_C,
4219 COPTIC,
4220 GEORGIAN_SUPPLEMENT,
4221 TIFINAGH,
4222 ETHIOPIC_EXTENDED,
4223 CYRILLIC_EXTENDED_A,
4224 SUPPLEMENTAL_PUNCTUATION,
4225 CJK_RADICALS_SUPPLEMENT,
4226 KANGXI_RADICALS,
4227 null,
4228 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
4229 CJK_SYMBOLS_AND_PUNCTUATION,
4230 HIRAGANA,
4231 KATAKANA,
4232 BOPOMOFO,
4233 HANGUL_COMPATIBILITY_JAMO,
4234 KANBUN,
4235 BOPOMOFO_EXTENDED,
4236 CJK_STROKES,
4237 KATAKANA_PHONETIC_EXTENSIONS,
4238 ENCLOSED_CJK_LETTERS_AND_MONTHS,
4239 CJK_COMPATIBILITY,
4240 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
4241 YIJING_HEXAGRAM_SYMBOLS,
4242 CJK_UNIFIED_IDEOGRAPHS,
4243 YI_SYLLABLES,
4244 YI_RADICALS,
4245 LISU,
4246 VAI,
4247 CYRILLIC_EXTENDED_B,
4248 BAMUM,
4249 MODIFIER_TONE_LETTERS,
4250 LATIN_EXTENDED_D,
4251 SYLOTI_NAGRI,
4252 COMMON_INDIC_NUMBER_FORMS,
4253 PHAGS_PA,
4254 SAURASHTRA,
4255 DEVANAGARI_EXTENDED,
4256 KAYAH_LI,
4257 REJANG,
4258 HANGUL_JAMO_EXTENDED_A,
4259 JAVANESE,
4260 MYANMAR_EXTENDED_B,
4261 CHAM,
4262 MYANMAR_EXTENDED_A,
4263 TAI_VIET,
4264 MEETEI_MAYEK_EXTENSIONS,
4265 ETHIOPIC_EXTENDED_A,
4266 LATIN_EXTENDED_E,
4267 CHEROKEE_SUPPLEMENT,
4268 MEETEI_MAYEK,
4269 HANGUL_SYLLABLES,
4270 HANGUL_JAMO_EXTENDED_B,
4271 HIGH_SURROGATES,
4272 HIGH_PRIVATE_USE_SURROGATES,
4273 LOW_SURROGATES,
4274 PRIVATE_USE_AREA,
4275 CJK_COMPATIBILITY_IDEOGRAPHS,
4276 ALPHABETIC_PRESENTATION_FORMS,
4277 ARABIC_PRESENTATION_FORMS_A,
4278 VARIATION_SELECTORS,
4279 VERTICAL_FORMS,
4280 COMBINING_HALF_MARKS,
4281 CJK_COMPATIBILITY_FORMS,
4282 SMALL_FORM_VARIANTS,
4283 ARABIC_PRESENTATION_FORMS_B,
4284 HALFWIDTH_AND_FULLWIDTH_FORMS,
4285 SPECIALS,
4286 LINEAR_B_SYLLABARY,
4287 LINEAR_B_IDEOGRAMS,
4288 AEGEAN_NUMBERS,
4289 ANCIENT_GREEK_NUMBERS,
4290 ANCIENT_SYMBOLS,
4291 PHAISTOS_DISC,
4292 null,
4293 LYCIAN,
4294 CARIAN,
4295 COPTIC_EPACT_NUMBERS,
4296 OLD_ITALIC,
4297 GOTHIC,
4298 OLD_PERMIC,
4299 UGARITIC,
4300 OLD_PERSIAN,
4301 null,
4302 DESERET,
4303 SHAVIAN,
4304 OSMANYA,
4305 OSAGE,
4306 ELBASAN,
4307 CAUCASIAN_ALBANIAN,
4308 VITHKUQI,
4309 TODHRI,
4310 LINEAR_A,
4311 LATIN_EXTENDED_F,
4312 null,
4313 CYPRIOT_SYLLABARY,
4314 IMPERIAL_ARAMAIC,
4315 PALMYRENE,
4316 NABATAEAN,
4317 null,
4318 HATRAN,
4319 PHOENICIAN,
4320 LYDIAN,
4321 null,
4322 MEROITIC_HIEROGLYPHS,
4323 MEROITIC_CURSIVE,
4324 KHAROSHTHI,
4325 OLD_SOUTH_ARABIAN,
4326 OLD_NORTH_ARABIAN,
4327 null,
4328 MANICHAEAN,
4329 AVESTAN,
4330 INSCRIPTIONAL_PARTHIAN,
4331 INSCRIPTIONAL_PAHLAVI,
4332 PSALTER_PAHLAVI,
4333 null,
4334 OLD_TURKIC,
4335 null,
4336 OLD_HUNGARIAN,
4337 HANIFI_ROHINGYA,
4338 GARAY,
4339 null,
4340 RUMI_NUMERAL_SYMBOLS,
4341 YEZIDI,
4342 ARABIC_EXTENDED_C,
4343 OLD_SOGDIAN,
4344 SOGDIAN,
4345 OLD_UYGHUR,
4346 CHORASMIAN,
4347 ELYMAIC,
4348 BRAHMI,
4349 KAITHI,
4350 SORA_SOMPENG,
4351 CHAKMA,
4352 MAHAJANI,
4353 SHARADA,
4354 SINHALA_ARCHAIC_NUMBERS,
4355 KHOJKI,
4356 null,
4357 MULTANI,
4358 KHUDAWADI,
4359 GRANTHA,
4360 TULU_TIGALARI,
4361 NEWA,
4362 TIRHUTA,
4363 null,
4364 SIDDHAM,
4365 MODI,
4366 MONGOLIAN_SUPPLEMENT,
4367 TAKRI,
4368 MYANMAR_EXTENDED_C,
4369 AHOM,
4370 null,
4371 DOGRA,
4372 null,
4373 WARANG_CITI,
4374 DIVES_AKURU,
4375 null,
4376 NANDINAGARI,
4377 ZANABAZAR_SQUARE,
4378 SOYOMBO,
4379 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A,
4380 PAU_CIN_HAU,
4381 DEVANAGARI_EXTENDED_A,
4382 null,
4383 SUNUWAR,
4384 BHAIKSUKI,
4385 MARCHEN,
4386 null,
4387 MASARAM_GONDI,
4388 GUNJALA_GONDI,
4389 null,
4390 MAKASAR,
4391 KAWI,
4392 null,
4393 LISU_SUPPLEMENT,
4394 TAMIL_SUPPLEMENT,
4395 CUNEIFORM,
4396 CUNEIFORM_NUMBERS_AND_PUNCTUATION,
4397 EARLY_DYNASTIC_CUNEIFORM,
4398 null,
4399 CYPRO_MINOAN,
4400 EGYPTIAN_HIEROGLYPHS,
4401 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
4402 EGYPTIAN_HIEROGLYPHS_EXTENDED_A,
4403 ANATOLIAN_HIEROGLYPHS,
4404 null,
4405 GURUNG_KHEMA,
4406 null,
4407 BAMUM_SUPPLEMENT,
4408 MRO,
4409 TANGSA,
4410 BASSA_VAH,
4411 PAHAWH_HMONG,
4412 null,
4413 KIRAT_RAI,
4414 null,
4415 MEDEFAIDRIN,
4416 null,
4417 MIAO,
4418 null,
4419 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
4420 TANGUT,
4421 TANGUT_COMPONENTS,
4422 KHITAN_SMALL_SCRIPT,
4423 TANGUT_SUPPLEMENT,
4424 null,
4425 KANA_EXTENDED_B,
4426 KANA_SUPPLEMENT,
4427 KANA_EXTENDED_A,
4428 SMALL_KANA_EXTENSION,
4429 NUSHU,
4430 null,
4431 DUPLOYAN,
4432 SHORTHAND_FORMAT_CONTROLS,
4433 null,
4434 SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT,
4435 null,
4436 ZNAMENNY_MUSICAL_NOTATION,
4437 null,
4438 BYZANTINE_MUSICAL_SYMBOLS,
4439 MUSICAL_SYMBOLS,
4440 ANCIENT_GREEK_MUSICAL_NOTATION,
4441 null,
4442 KAKTOVIK_NUMERALS,
4443 MAYAN_NUMERALS,
4444 TAI_XUAN_JING_SYMBOLS,
4445 COUNTING_ROD_NUMERALS,
4446 null,
4447 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
4448 SUTTON_SIGNWRITING,
4449 null,
4450 LATIN_EXTENDED_G,
4451 GLAGOLITIC_SUPPLEMENT,
4452 CYRILLIC_EXTENDED_D,
4453 null,
4454 NYIAKENG_PUACHUE_HMONG,
4455 null,
4456 TOTO,
4457 WANCHO,
4458 null,
4459 NAG_MUNDARI,
4460 null,
4461 OL_ONAL,
4462 null,
4463 ETHIOPIC_EXTENDED_B,
4464 MENDE_KIKAKUI,
4465 null,
4466 ADLAM,
4467 null,
4468 INDIC_SIYAQ_NUMBERS,
4469 null,
4470 OTTOMAN_SIYAQ_NUMBERS,
4471 null,
4472 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
4473 null,
4474 MAHJONG_TILES,
4475 DOMINO_TILES,
4476 PLAYING_CARDS,
4477 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
4478 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
4479 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
4480 EMOTICONS,
4481 ORNAMENTAL_DINGBATS,
4482 TRANSPORT_AND_MAP_SYMBOLS,
4483 ALCHEMICAL_SYMBOLS,
4484 GEOMETRIC_SHAPES_EXTENDED,
4485 SUPPLEMENTAL_ARROWS_C,
4486 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
4487 CHESS_SYMBOLS,
4488 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
4489 SYMBOLS_FOR_LEGACY_COMPUTING,
4490 null,
4491 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
4492 null,
4493 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
4494 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
4495 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
4496 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
4497 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I,
4498 null,
4499 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
4500 null,
4501 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G,
4502 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H,
4503 null,
4504 TAGS,
4505 null,
4506 VARIATION_SELECTORS_SUPPLEMENT,
4507 null,
4508 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
4509 SUPPLEMENTARY_PRIVATE_USE_AREA_B,
4510 };
4511
4512
4513 /**
4514 * Returns the object representing the Unicode block containing the
4515 * given character, or {@code null} if the character is not a
4516 * member of a defined block.
4517 *
4518 * <p><b>Note:</b> This method cannot handle
4519 * <a href="Character.html#supplementary"> supplementary
4520 * characters</a>. To support all Unicode characters, including
4521 * supplementary characters, use the {@link #of(int)} method.
4522 *
4523 * @param c The character in question
4524 * @return The {@code UnicodeBlock} instance representing the
4525 * Unicode block of which this character is a member, or
4526 * {@code null} if the character is not a member of any
4527 * Unicode block
4528 */
4529 public static UnicodeBlock of(char c) {
4530 return of((int)c);
4531 }
4532
4533 /**
4534 * Returns the object representing the Unicode block
4535 * containing the given character (Unicode code point), or
4536 * {@code null} if the character is not a member of a
4537 * defined block.
4538 *
4539 * @param codePoint the character (Unicode code point) in question.
4540 * @return The {@code UnicodeBlock} instance representing the
4541 * Unicode block of which this character is a member, or
4542 * {@code null} if the character is not a member of any
4543 * Unicode block
4544 * @throws IllegalArgumentException if the specified
4545 * {@code codePoint} is an invalid Unicode code point.
4546 * @see Character#isValidCodePoint(int)
4547 * @since 1.5
4548 */
4549 public static UnicodeBlock of(int codePoint) {
4550 if (!isValidCodePoint(codePoint)) {
4551 throw new IllegalArgumentException(
4552 String.format("Not a valid Unicode code point: 0x%X", codePoint));
4553 }
4554
4555 int top, bottom, current;
4556 bottom = 0;
4557 top = blockStarts.length;
4558 current = top/2;
4559
4560 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4561 while (top - bottom > 1) {
4562 if (codePoint >= blockStarts[current]) {
4563 bottom = current;
4564 } else {
4565 top = current;
4566 }
4567 current = (top + bottom) / 2;
4568 }
4569 return blocks[current];
4570 }
4571
4572 /**
4573 * Returns the UnicodeBlock with the given name. Block
4574 * names are determined by The Unicode Standard. The file
4575 * {@code Blocks.txt} defines blocks for a particular
4576 * version of the standard. The {@link Character} class specifies
4577 * the version of the standard that it supports.
4578 * <p>
4579 * This method accepts block names in the following forms:
4580 * <ol>
4581 * <li> Canonical block names as defined by the Unicode Standard.
4582 * For example, the standard defines a "Basic Latin" block. Therefore, this
4583 * method accepts "Basic Latin" as a valid block name. The documentation of
4584 * each UnicodeBlock provides the canonical name.
4585 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4586 * is a valid block name for the "Basic Latin" block.
4587 * <li>The text representation of each constant UnicodeBlock identifier.
4588 * For example, this method will return the {@link #BASIC_LATIN} block if
4589 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
4590 * hyphens in the canonical name with underscores.
4591 * </ol>
4592 * Finally, character case is ignored for all of the valid block name forms.
4593 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4594 * The en_US locale's case mapping rules are used to provide case-insensitive
4595 * string comparisons for block name validation.
4596 * <p>
4597 * If the Unicode Standard changes block names, both the previous and
4598 * current names will be accepted.
4599 *
4600 * @param blockName A {@code UnicodeBlock} name.
4601 * @return The {@code UnicodeBlock} instance identified
4602 * by {@code blockName}
4603 * @throws IllegalArgumentException if {@code blockName} is an
4604 * invalid name
4605 * @throws NullPointerException if {@code blockName} is null
4606 * @since 1.5
4607 */
4608 public static final UnicodeBlock forName(String blockName) {
4609 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4610 if (block == null) {
4611 throw new IllegalArgumentException("Not a valid block name: "
4612 + blockName);
4613 }
4614 return block;
4615 }
4616 }
4617
4618
4619 /**
4620 * A family of character subsets representing the character scripts
4621 * defined in the <a href="http://www.unicode.org/reports/tr24/">
4622 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4623 * character is assigned to a single Unicode script, either a specific
4624 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4625 * one of the following three special values,
4626 * {@link Character.UnicodeScript#INHERITED Inherited},
4627 * {@link Character.UnicodeScript#COMMON Common} or
4628 * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4629 *
4630 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property
4631 * @since 1.7
4632 */
4633 public static enum UnicodeScript {
4634
4635 /**
4636 * Unicode script "Common".
4637 */
4638 COMMON,
4639
4640 /**
4641 * Unicode script "Latin".
4642 */
4643 LATIN,
4644
4645 /**
4646 * Unicode script "Greek".
4647 */
4648 GREEK,
4649
4650 /**
4651 * Unicode script "Cyrillic".
4652 */
4653 CYRILLIC,
4654
4655 /**
4656 * Unicode script "Armenian".
4657 */
4658 ARMENIAN,
4659
4660 /**
4661 * Unicode script "Hebrew".
4662 */
4663 HEBREW,
4664
4665 /**
4666 * Unicode script "Arabic".
4667 */
4668 ARABIC,
4669
4670 /**
4671 * Unicode script "Syriac".
4672 */
4673 SYRIAC,
4674
4675 /**
4676 * Unicode script "Thaana".
4677 */
4678 THAANA,
4679
4680 /**
4681 * Unicode script "Devanagari".
4682 */
4683 DEVANAGARI,
4684
4685 /**
4686 * Unicode script "Bengali".
4687 */
4688 BENGALI,
4689
4690 /**
4691 * Unicode script "Gurmukhi".
4692 */
4693 GURMUKHI,
4694
4695 /**
4696 * Unicode script "Gujarati".
4697 */
4698 GUJARATI,
4699
4700 /**
4701 * Unicode script "Oriya".
4702 */
4703 ORIYA,
4704
4705 /**
4706 * Unicode script "Tamil".
4707 */
4708 TAMIL,
4709
4710 /**
4711 * Unicode script "Telugu".
4712 */
4713 TELUGU,
4714
4715 /**
4716 * Unicode script "Kannada".
4717 */
4718 KANNADA,
4719
4720 /**
4721 * Unicode script "Malayalam".
4722 */
4723 MALAYALAM,
4724
4725 /**
4726 * Unicode script "Sinhala".
4727 */
4728 SINHALA,
4729
4730 /**
4731 * Unicode script "Thai".
4732 */
4733 THAI,
4734
4735 /**
4736 * Unicode script "Lao".
4737 */
4738 LAO,
4739
4740 /**
4741 * Unicode script "Tibetan".
4742 */
4743 TIBETAN,
4744
4745 /**
4746 * Unicode script "Myanmar".
4747 */
4748 MYANMAR,
4749
4750 /**
4751 * Unicode script "Georgian".
4752 */
4753 GEORGIAN,
4754
4755 /**
4756 * Unicode script "Hangul".
4757 */
4758 HANGUL,
4759
4760 /**
4761 * Unicode script "Ethiopic".
4762 */
4763 ETHIOPIC,
4764
4765 /**
4766 * Unicode script "Cherokee".
4767 */
4768 CHEROKEE,
4769
4770 /**
4771 * Unicode script "Canadian_Aboriginal".
4772 */
4773 CANADIAN_ABORIGINAL,
4774
4775 /**
4776 * Unicode script "Ogham".
4777 */
4778 OGHAM,
4779
4780 /**
4781 * Unicode script "Runic".
4782 */
4783 RUNIC,
4784
4785 /**
4786 * Unicode script "Khmer".
4787 */
4788 KHMER,
4789
4790 /**
4791 * Unicode script "Mongolian".
4792 */
4793 MONGOLIAN,
4794
4795 /**
4796 * Unicode script "Hiragana".
4797 */
4798 HIRAGANA,
4799
4800 /**
4801 * Unicode script "Katakana".
4802 */
4803 KATAKANA,
4804
4805 /**
4806 * Unicode script "Bopomofo".
4807 */
4808 BOPOMOFO,
4809
4810 /**
4811 * Unicode script "Han".
4812 */
4813 HAN,
4814
4815 /**
4816 * Unicode script "Yi".
4817 */
4818 YI,
4819
4820 /**
4821 * Unicode script "Old_Italic".
4822 */
4823 OLD_ITALIC,
4824
4825 /**
4826 * Unicode script "Gothic".
4827 */
4828 GOTHIC,
4829
4830 /**
4831 * Unicode script "Deseret".
4832 */
4833 DESERET,
4834
4835 /**
4836 * Unicode script "Inherited".
4837 */
4838 INHERITED,
4839
4840 /**
4841 * Unicode script "Tagalog".
4842 */
4843 TAGALOG,
4844
4845 /**
4846 * Unicode script "Hanunoo".
4847 */
4848 HANUNOO,
4849
4850 /**
4851 * Unicode script "Buhid".
4852 */
4853 BUHID,
4854
4855 /**
4856 * Unicode script "Tagbanwa".
4857 */
4858 TAGBANWA,
4859
4860 /**
4861 * Unicode script "Limbu".
4862 */
4863 LIMBU,
4864
4865 /**
4866 * Unicode script "Tai_Le".
4867 */
4868 TAI_LE,
4869
4870 /**
4871 * Unicode script "Linear_B".
4872 */
4873 LINEAR_B,
4874
4875 /**
4876 * Unicode script "Ugaritic".
4877 */
4878 UGARITIC,
4879
4880 /**
4881 * Unicode script "Shavian".
4882 */
4883 SHAVIAN,
4884
4885 /**
4886 * Unicode script "Osmanya".
4887 */
4888 OSMANYA,
4889
4890 /**
4891 * Unicode script "Cypriot".
4892 */
4893 CYPRIOT,
4894
4895 /**
4896 * Unicode script "Braille".
4897 */
4898 BRAILLE,
4899
4900 /**
4901 * Unicode script "Buginese".
4902 */
4903 BUGINESE,
4904
4905 /**
4906 * Unicode script "Coptic".
4907 */
4908 COPTIC,
4909
4910 /**
4911 * Unicode script "New_Tai_Lue".
4912 */
4913 NEW_TAI_LUE,
4914
4915 /**
4916 * Unicode script "Glagolitic".
4917 */
4918 GLAGOLITIC,
4919
4920 /**
4921 * Unicode script "Tifinagh".
4922 */
4923 TIFINAGH,
4924
4925 /**
4926 * Unicode script "Syloti_Nagri".
4927 */
4928 SYLOTI_NAGRI,
4929
4930 /**
4931 * Unicode script "Old_Persian".
4932 */
4933 OLD_PERSIAN,
4934
4935 /**
4936 * Unicode script "Kharoshthi".
4937 */
4938 KHAROSHTHI,
4939
4940 /**
4941 * Unicode script "Balinese".
4942 */
4943 BALINESE,
4944
4945 /**
4946 * Unicode script "Cuneiform".
4947 */
4948 CUNEIFORM,
4949
4950 /**
4951 * Unicode script "Phoenician".
4952 */
4953 PHOENICIAN,
4954
4955 /**
4956 * Unicode script "Phags_Pa".
4957 */
4958 PHAGS_PA,
4959
4960 /**
4961 * Unicode script "Nko".
4962 */
4963 NKO,
4964
4965 /**
4966 * Unicode script "Sundanese".
4967 */
4968 SUNDANESE,
4969
4970 /**
4971 * Unicode script "Batak".
4972 */
4973 BATAK,
4974
4975 /**
4976 * Unicode script "Lepcha".
4977 */
4978 LEPCHA,
4979
4980 /**
4981 * Unicode script "Ol_Chiki".
4982 */
4983 OL_CHIKI,
4984
4985 /**
4986 * Unicode script "Vai".
4987 */
4988 VAI,
4989
4990 /**
4991 * Unicode script "Saurashtra".
4992 */
4993 SAURASHTRA,
4994
4995 /**
4996 * Unicode script "Kayah_Li".
4997 */
4998 KAYAH_LI,
4999
5000 /**
5001 * Unicode script "Rejang".
5002 */
5003 REJANG,
5004
5005 /**
5006 * Unicode script "Lycian".
5007 */
5008 LYCIAN,
5009
5010 /**
5011 * Unicode script "Carian".
5012 */
5013 CARIAN,
5014
5015 /**
5016 * Unicode script "Lydian".
5017 */
5018 LYDIAN,
5019
5020 /**
5021 * Unicode script "Cham".
5022 */
5023 CHAM,
5024
5025 /**
5026 * Unicode script "Tai_Tham".
5027 */
5028 TAI_THAM,
5029
5030 /**
5031 * Unicode script "Tai_Viet".
5032 */
5033 TAI_VIET,
5034
5035 /**
5036 * Unicode script "Avestan".
5037 */
5038 AVESTAN,
5039
5040 /**
5041 * Unicode script "Egyptian_Hieroglyphs".
5042 */
5043 EGYPTIAN_HIEROGLYPHS,
5044
5045 /**
5046 * Unicode script "Samaritan".
5047 */
5048 SAMARITAN,
5049
5050 /**
5051 * Unicode script "Mandaic".
5052 */
5053 MANDAIC,
5054
5055 /**
5056 * Unicode script "Lisu".
5057 */
5058 LISU,
5059
5060 /**
5061 * Unicode script "Bamum".
5062 */
5063 BAMUM,
5064
5065 /**
5066 * Unicode script "Javanese".
5067 */
5068 JAVANESE,
5069
5070 /**
5071 * Unicode script "Meetei_Mayek".
5072 */
5073 MEETEI_MAYEK,
5074
5075 /**
5076 * Unicode script "Imperial_Aramaic".
5077 */
5078 IMPERIAL_ARAMAIC,
5079
5080 /**
5081 * Unicode script "Old_South_Arabian".
5082 */
5083 OLD_SOUTH_ARABIAN,
5084
5085 /**
5086 * Unicode script "Inscriptional_Parthian".
5087 */
5088 INSCRIPTIONAL_PARTHIAN,
5089
5090 /**
5091 * Unicode script "Inscriptional_Pahlavi".
5092 */
5093 INSCRIPTIONAL_PAHLAVI,
5094
5095 /**
5096 * Unicode script "Old_Turkic".
5097 */
5098 OLD_TURKIC,
5099
5100 /**
5101 * Unicode script "Brahmi".
5102 */
5103 BRAHMI,
5104
5105 /**
5106 * Unicode script "Kaithi".
5107 */
5108 KAITHI,
5109
5110 /**
5111 * Unicode script "Meroitic Hieroglyphs".
5112 * @since 1.8
5113 */
5114 MEROITIC_HIEROGLYPHS,
5115
5116 /**
5117 * Unicode script "Meroitic Cursive".
5118 * @since 1.8
5119 */
5120 MEROITIC_CURSIVE,
5121
5122 /**
5123 * Unicode script "Sora Sompeng".
5124 * @since 1.8
5125 */
5126 SORA_SOMPENG,
5127
5128 /**
5129 * Unicode script "Chakma".
5130 * @since 1.8
5131 */
5132 CHAKMA,
5133
5134 /**
5135 * Unicode script "Sharada".
5136 * @since 1.8
5137 */
5138 SHARADA,
5139
5140 /**
5141 * Unicode script "Takri".
5142 * @since 1.8
5143 */
5144 TAKRI,
5145
5146 /**
5147 * Unicode script "Miao".
5148 * @since 1.8
5149 */
5150 MIAO,
5151
5152 /**
5153 * Unicode script "Caucasian Albanian".
5154 * @since 9
5155 */
5156 CAUCASIAN_ALBANIAN,
5157
5158 /**
5159 * Unicode script "Bassa Vah".
5160 * @since 9
5161 */
5162 BASSA_VAH,
5163
5164 /**
5165 * Unicode script "Duployan".
5166 * @since 9
5167 */
5168 DUPLOYAN,
5169
5170 /**
5171 * Unicode script "Elbasan".
5172 * @since 9
5173 */
5174 ELBASAN,
5175
5176 /**
5177 * Unicode script "Grantha".
5178 * @since 9
5179 */
5180 GRANTHA,
5181
5182 /**
5183 * Unicode script "Pahawh Hmong".
5184 * @since 9
5185 */
5186 PAHAWH_HMONG,
5187
5188 /**
5189 * Unicode script "Khojki".
5190 * @since 9
5191 */
5192 KHOJKI,
5193
5194 /**
5195 * Unicode script "Linear A".
5196 * @since 9
5197 */
5198 LINEAR_A,
5199
5200 /**
5201 * Unicode script "Mahajani".
5202 * @since 9
5203 */
5204 MAHAJANI,
5205
5206 /**
5207 * Unicode script "Manichaean".
5208 * @since 9
5209 */
5210 MANICHAEAN,
5211
5212 /**
5213 * Unicode script "Mende Kikakui".
5214 * @since 9
5215 */
5216 MENDE_KIKAKUI,
5217
5218 /**
5219 * Unicode script "Modi".
5220 * @since 9
5221 */
5222 MODI,
5223
5224 /**
5225 * Unicode script "Mro".
5226 * @since 9
5227 */
5228 MRO,
5229
5230 /**
5231 * Unicode script "Old North Arabian".
5232 * @since 9
5233 */
5234 OLD_NORTH_ARABIAN,
5235
5236 /**
5237 * Unicode script "Nabataean".
5238 * @since 9
5239 */
5240 NABATAEAN,
5241
5242 /**
5243 * Unicode script "Palmyrene".
5244 * @since 9
5245 */
5246 PALMYRENE,
5247
5248 /**
5249 * Unicode script "Pau Cin Hau".
5250 * @since 9
5251 */
5252 PAU_CIN_HAU,
5253
5254 /**
5255 * Unicode script "Old Permic".
5256 * @since 9
5257 */
5258 OLD_PERMIC,
5259
5260 /**
5261 * Unicode script "Psalter Pahlavi".
5262 * @since 9
5263 */
5264 PSALTER_PAHLAVI,
5265
5266 /**
5267 * Unicode script "Siddham".
5268 * @since 9
5269 */
5270 SIDDHAM,
5271
5272 /**
5273 * Unicode script "Khudawadi".
5274 * @since 9
5275 */
5276 KHUDAWADI,
5277
5278 /**
5279 * Unicode script "Tirhuta".
5280 * @since 9
5281 */
5282 TIRHUTA,
5283
5284 /**
5285 * Unicode script "Warang Citi".
5286 * @since 9
5287 */
5288 WARANG_CITI,
5289
5290 /**
5291 * Unicode script "Ahom".
5292 * @since 9
5293 */
5294 AHOM,
5295
5296 /**
5297 * Unicode script "Anatolian Hieroglyphs".
5298 * @since 9
5299 */
5300 ANATOLIAN_HIEROGLYPHS,
5301
5302 /**
5303 * Unicode script "Hatran".
5304 * @since 9
5305 */
5306 HATRAN,
5307
5308 /**
5309 * Unicode script "Multani".
5310 * @since 9
5311 */
5312 MULTANI,
5313
5314 /**
5315 * Unicode script "Old Hungarian".
5316 * @since 9
5317 */
5318 OLD_HUNGARIAN,
5319
5320 /**
5321 * Unicode script "SignWriting".
5322 * @since 9
5323 */
5324 SIGNWRITING,
5325
5326 /**
5327 * Unicode script "Adlam".
5328 * @since 11
5329 */
5330 ADLAM,
5331
5332 /**
5333 * Unicode script "Bhaiksuki".
5334 * @since 11
5335 */
5336 BHAIKSUKI,
5337
5338 /**
5339 * Unicode script "Marchen".
5340 * @since 11
5341 */
5342 MARCHEN,
5343
5344 /**
5345 * Unicode script "Newa".
5346 * @since 11
5347 */
5348 NEWA,
5349
5350 /**
5351 * Unicode script "Osage".
5352 * @since 11
5353 */
5354 OSAGE,
5355
5356 /**
5357 * Unicode script "Tangut".
5358 * @since 11
5359 */
5360 TANGUT,
5361
5362 /**
5363 * Unicode script "Masaram Gondi".
5364 * @since 11
5365 */
5366 MASARAM_GONDI,
5367
5368 /**
5369 * Unicode script "Nushu".
5370 * @since 11
5371 */
5372 NUSHU,
5373
5374 /**
5375 * Unicode script "Soyombo".
5376 * @since 11
5377 */
5378 SOYOMBO,
5379
5380 /**
5381 * Unicode script "Zanabazar Square".
5382 * @since 11
5383 */
5384 ZANABAZAR_SQUARE,
5385
5386 /**
5387 * Unicode script "Hanifi Rohingya".
5388 * @since 12
5389 */
5390 HANIFI_ROHINGYA,
5391
5392 /**
5393 * Unicode script "Old Sogdian".
5394 * @since 12
5395 */
5396 OLD_SOGDIAN,
5397
5398 /**
5399 * Unicode script "Sogdian".
5400 * @since 12
5401 */
5402 SOGDIAN,
5403
5404 /**
5405 * Unicode script "Dogra".
5406 * @since 12
5407 */
5408 DOGRA,
5409
5410 /**
5411 * Unicode script "Gunjala Gondi".
5412 * @since 12
5413 */
5414 GUNJALA_GONDI,
5415
5416 /**
5417 * Unicode script "Makasar".
5418 * @since 12
5419 */
5420 MAKASAR,
5421
5422 /**
5423 * Unicode script "Medefaidrin".
5424 * @since 12
5425 */
5426 MEDEFAIDRIN,
5427
5428 /**
5429 * Unicode script "Elymaic".
5430 * @since 13
5431 */
5432 ELYMAIC,
5433
5434 /**
5435 * Unicode script "Nandinagari".
5436 * @since 13
5437 */
5438 NANDINAGARI,
5439
5440 /**
5441 * Unicode script "Nyiakeng Puachue Hmong".
5442 * @since 13
5443 */
5444 NYIAKENG_PUACHUE_HMONG,
5445
5446 /**
5447 * Unicode script "Wancho".
5448 * @since 13
5449 */
5450 WANCHO,
5451
5452 /**
5453 * Unicode script "Yezidi".
5454 * @since 15
5455 */
5456 YEZIDI,
5457
5458 /**
5459 * Unicode script "Chorasmian".
5460 * @since 15
5461 */
5462 CHORASMIAN,
5463
5464 /**
5465 * Unicode script "Dives Akuru".
5466 * @since 15
5467 */
5468 DIVES_AKURU,
5469
5470 /**
5471 * Unicode script "Khitan Small Script".
5472 * @since 15
5473 */
5474 KHITAN_SMALL_SCRIPT,
5475
5476 /**
5477 * Unicode script "Vithkuqi".
5478 * @since 19
5479 */
5480 VITHKUQI,
5481
5482 /**
5483 * Unicode script "Old Uyghur".
5484 * @since 19
5485 */
5486 OLD_UYGHUR,
5487
5488 /**
5489 * Unicode script "Cypro Minoan".
5490 * @since 19
5491 */
5492 CYPRO_MINOAN,
5493
5494 /**
5495 * Unicode script "Tangsa".
5496 * @since 19
5497 */
5498 TANGSA,
5499
5500 /**
5501 * Unicode script "Toto".
5502 * @since 19
5503 */
5504 TOTO,
5505
5506 /**
5507 * Unicode script "Kawi".
5508 * @since 20
5509 */
5510 KAWI,
5511
5512 /**
5513 * Unicode script "Nag Mundari".
5514 * @since 20
5515 */
5516 NAG_MUNDARI,
5517
5518 /**
5519 * Unicode script "Todhri".
5520 * @since 24
5521 */
5522 TODHRI,
5523
5524 /**
5525 * Unicode script "Garay".
5526 * @since 24
5527 */
5528 GARAY,
5529
5530 /**
5531 * Unicode script "Tulu Tigalari".
5532 * @since 24
5533 */
5534 TULU_TIGALARI,
5535
5536 /**
5537 * Unicode script "Sunuwar".
5538 * @since 24
5539 */
5540 SUNUWAR,
5541
5542 /**
5543 * Unicode script "Gurung Khema".
5544 * @since 24
5545 */
5546 GURUNG_KHEMA,
5547
5548 /**
5549 * Unicode script "Kirat Rai".
5550 * @since 24
5551 */
5552 KIRAT_RAI,
5553
5554 /**
5555 * Unicode script "Ol Onal".
5556 * @since 24
5557 */
5558 OL_ONAL,
5559
5560 /**
5561 * Unicode script "Unknown".
5562 */
5563 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map.
5564
5565 private static final int[] scriptStarts = {
5566 0x0000, // 0000..0040; COMMON
5567 0x0041, // 0041..005A; LATIN
5568 0x005B, // 005B..0060; COMMON
5569 0x0061, // 0061..007A; LATIN
5570 0x007B, // 007B..00A9; COMMON
5571 0x00AA, // 00AA ; LATIN
5572 0x00AB, // 00AB..00B9; COMMON
5573 0x00BA, // 00BA ; LATIN
5574 0x00BB, // 00BB..00BF; COMMON
5575 0x00C0, // 00C0..00D6; LATIN
5576 0x00D7, // 00D7 ; COMMON
5577 0x00D8, // 00D8..00F6; LATIN
5578 0x00F7, // 00F7 ; COMMON
5579 0x00F8, // 00F8..02B8; LATIN
5580 0x02B9, // 02B9..02DF; COMMON
5581 0x02E0, // 02E0..02E4; LATIN
5582 0x02E5, // 02E5..02E9; COMMON
5583 0x02EA, // 02EA..02EB; BOPOMOFO
5584 0x02EC, // 02EC..02FF; COMMON
5585 0x0300, // 0300..036F; INHERITED
5586 0x0370, // 0370..0373; GREEK
5587 0x0374, // 0374 ; COMMON
5588 0x0375, // 0375..0377; GREEK
5589 0x0378, // 0378..0379; UNKNOWN
5590 0x037A, // 037A..037D; GREEK
5591 0x037E, // 037E ; COMMON
5592 0x037F, // 037F ; GREEK
5593 0x0380, // 0380..0383; UNKNOWN
5594 0x0384, // 0384 ; GREEK
5595 0x0385, // 0385 ; COMMON
5596 0x0386, // 0386 ; GREEK
5597 0x0387, // 0387 ; COMMON
5598 0x0388, // 0388..038A; GREEK
5599 0x038B, // 038B ; UNKNOWN
5600 0x038C, // 038C ; GREEK
5601 0x038D, // 038D ; UNKNOWN
5602 0x038E, // 038E..03A1; GREEK
5603 0x03A2, // 03A2 ; UNKNOWN
5604 0x03A3, // 03A3..03E1; GREEK
5605 0x03E2, // 03E2..03EF; COPTIC
5606 0x03F0, // 03F0..03FF; GREEK
5607 0x0400, // 0400..0484; CYRILLIC
5608 0x0485, // 0485..0486; INHERITED
5609 0x0487, // 0487..052F; CYRILLIC
5610 0x0530, // 0530 ; UNKNOWN
5611 0x0531, // 0531..0556; ARMENIAN
5612 0x0557, // 0557..0558; UNKNOWN
5613 0x0559, // 0559..058A; ARMENIAN
5614 0x058B, // 058B..058C; UNKNOWN
5615 0x058D, // 058D..058F; ARMENIAN
5616 0x0590, // 0590 ; UNKNOWN
5617 0x0591, // 0591..05C7; HEBREW
5618 0x05C8, // 05C8..05CF; UNKNOWN
5619 0x05D0, // 05D0..05EA; HEBREW
5620 0x05EB, // 05EB..05EE; UNKNOWN
5621 0x05EF, // 05EF..05F4; HEBREW
5622 0x05F5, // 05F5..05FF; UNKNOWN
5623 0x0600, // 0600..0604; ARABIC
5624 0x0605, // 0605 ; COMMON
5625 0x0606, // 0606..060B; ARABIC
5626 0x060C, // 060C ; COMMON
5627 0x060D, // 060D..061A; ARABIC
5628 0x061B, // 061B ; COMMON
5629 0x061C, // 061C..061E; ARABIC
5630 0x061F, // 061F ; COMMON
5631 0x0620, // 0620..063F; ARABIC
5632 0x0640, // 0640 ; COMMON
5633 0x0641, // 0641..064A; ARABIC
5634 0x064B, // 064B..0655; INHERITED
5635 0x0656, // 0656..066F; ARABIC
5636 0x0670, // 0670 ; INHERITED
5637 0x0671, // 0671..06DC; ARABIC
5638 0x06DD, // 06DD ; COMMON
5639 0x06DE, // 06DE..06FF; ARABIC
5640 0x0700, // 0700..070D; SYRIAC
5641 0x070E, // 070E ; UNKNOWN
5642 0x070F, // 070F..074A; SYRIAC
5643 0x074B, // 074B..074C; UNKNOWN
5644 0x074D, // 074D..074F; SYRIAC
5645 0x0750, // 0750..077F; ARABIC
5646 0x0780, // 0780..07B1; THAANA
5647 0x07B2, // 07B2..07BF; UNKNOWN
5648 0x07C0, // 07C0..07FA; NKO
5649 0x07FB, // 07FB..07FC; UNKNOWN
5650 0x07FD, // 07FD..07FF; NKO
5651 0x0800, // 0800..082D; SAMARITAN
5652 0x082E, // 082E..082F; UNKNOWN
5653 0x0830, // 0830..083E; SAMARITAN
5654 0x083F, // 083F ; UNKNOWN
5655 0x0840, // 0840..085B; MANDAIC
5656 0x085C, // 085C..085D; UNKNOWN
5657 0x085E, // 085E ; MANDAIC
5658 0x085F, // 085F ; UNKNOWN
5659 0x0860, // 0860..086A; SYRIAC
5660 0x086B, // 086B..086F; UNKNOWN
5661 0x0870, // 0870..088E; ARABIC
5662 0x088F, // 088F ; UNKNOWN
5663 0x0890, // 0890..0891; ARABIC
5664 0x0892, // 0892..0896; UNKNOWN
5665 0x0897, // 0897..08E1; ARABIC
5666 0x08E2, // 08E2 ; COMMON
5667 0x08E3, // 08E3..08FF; ARABIC
5668 0x0900, // 0900..0950; DEVANAGARI
5669 0x0951, // 0951..0954; INHERITED
5670 0x0955, // 0955..0963; DEVANAGARI
5671 0x0964, // 0964..0965; COMMON
5672 0x0966, // 0966..097F; DEVANAGARI
5673 0x0980, // 0980..0983; BENGALI
5674 0x0984, // 0984 ; UNKNOWN
5675 0x0985, // 0985..098C; BENGALI
5676 0x098D, // 098D..098E; UNKNOWN
5677 0x098F, // 098F..0990; BENGALI
5678 0x0991, // 0991..0992; UNKNOWN
5679 0x0993, // 0993..09A8; BENGALI
5680 0x09A9, // 09A9 ; UNKNOWN
5681 0x09AA, // 09AA..09B0; BENGALI
5682 0x09B1, // 09B1 ; UNKNOWN
5683 0x09B2, // 09B2 ; BENGALI
5684 0x09B3, // 09B3..09B5; UNKNOWN
5685 0x09B6, // 09B6..09B9; BENGALI
5686 0x09BA, // 09BA..09BB; UNKNOWN
5687 0x09BC, // 09BC..09C4; BENGALI
5688 0x09C5, // 09C5..09C6; UNKNOWN
5689 0x09C7, // 09C7..09C8; BENGALI
5690 0x09C9, // 09C9..09CA; UNKNOWN
5691 0x09CB, // 09CB..09CE; BENGALI
5692 0x09CF, // 09CF..09D6; UNKNOWN
5693 0x09D7, // 09D7 ; BENGALI
5694 0x09D8, // 09D8..09DB; UNKNOWN
5695 0x09DC, // 09DC..09DD; BENGALI
5696 0x09DE, // 09DE ; UNKNOWN
5697 0x09DF, // 09DF..09E3; BENGALI
5698 0x09E4, // 09E4..09E5; UNKNOWN
5699 0x09E6, // 09E6..09FE; BENGALI
5700 0x09FF, // 09FF..0A00; UNKNOWN
5701 0x0A01, // 0A01..0A03; GURMUKHI
5702 0x0A04, // 0A04 ; UNKNOWN
5703 0x0A05, // 0A05..0A0A; GURMUKHI
5704 0x0A0B, // 0A0B..0A0E; UNKNOWN
5705 0x0A0F, // 0A0F..0A10; GURMUKHI
5706 0x0A11, // 0A11..0A12; UNKNOWN
5707 0x0A13, // 0A13..0A28; GURMUKHI
5708 0x0A29, // 0A29 ; UNKNOWN
5709 0x0A2A, // 0A2A..0A30; GURMUKHI
5710 0x0A31, // 0A31 ; UNKNOWN
5711 0x0A32, // 0A32..0A33; GURMUKHI
5712 0x0A34, // 0A34 ; UNKNOWN
5713 0x0A35, // 0A35..0A36; GURMUKHI
5714 0x0A37, // 0A37 ; UNKNOWN
5715 0x0A38, // 0A38..0A39; GURMUKHI
5716 0x0A3A, // 0A3A..0A3B; UNKNOWN
5717 0x0A3C, // 0A3C ; GURMUKHI
5718 0x0A3D, // 0A3D ; UNKNOWN
5719 0x0A3E, // 0A3E..0A42; GURMUKHI
5720 0x0A43, // 0A43..0A46; UNKNOWN
5721 0x0A47, // 0A47..0A48; GURMUKHI
5722 0x0A49, // 0A49..0A4A; UNKNOWN
5723 0x0A4B, // 0A4B..0A4D; GURMUKHI
5724 0x0A4E, // 0A4E..0A50; UNKNOWN
5725 0x0A51, // 0A51 ; GURMUKHI
5726 0x0A52, // 0A52..0A58; UNKNOWN
5727 0x0A59, // 0A59..0A5C; GURMUKHI
5728 0x0A5D, // 0A5D ; UNKNOWN
5729 0x0A5E, // 0A5E ; GURMUKHI
5730 0x0A5F, // 0A5F..0A65; UNKNOWN
5731 0x0A66, // 0A66..0A76; GURMUKHI
5732 0x0A77, // 0A77..0A80; UNKNOWN
5733 0x0A81, // 0A81..0A83; GUJARATI
5734 0x0A84, // 0A84 ; UNKNOWN
5735 0x0A85, // 0A85..0A8D; GUJARATI
5736 0x0A8E, // 0A8E ; UNKNOWN
5737 0x0A8F, // 0A8F..0A91; GUJARATI
5738 0x0A92, // 0A92 ; UNKNOWN
5739 0x0A93, // 0A93..0AA8; GUJARATI
5740 0x0AA9, // 0AA9 ; UNKNOWN
5741 0x0AAA, // 0AAA..0AB0; GUJARATI
5742 0x0AB1, // 0AB1 ; UNKNOWN
5743 0x0AB2, // 0AB2..0AB3; GUJARATI
5744 0x0AB4, // 0AB4 ; UNKNOWN
5745 0x0AB5, // 0AB5..0AB9; GUJARATI
5746 0x0ABA, // 0ABA..0ABB; UNKNOWN
5747 0x0ABC, // 0ABC..0AC5; GUJARATI
5748 0x0AC6, // 0AC6 ; UNKNOWN
5749 0x0AC7, // 0AC7..0AC9; GUJARATI
5750 0x0ACA, // 0ACA ; UNKNOWN
5751 0x0ACB, // 0ACB..0ACD; GUJARATI
5752 0x0ACE, // 0ACE..0ACF; UNKNOWN
5753 0x0AD0, // 0AD0 ; GUJARATI
5754 0x0AD1, // 0AD1..0ADF; UNKNOWN
5755 0x0AE0, // 0AE0..0AE3; GUJARATI
5756 0x0AE4, // 0AE4..0AE5; UNKNOWN
5757 0x0AE6, // 0AE6..0AF1; GUJARATI
5758 0x0AF2, // 0AF2..0AF8; UNKNOWN
5759 0x0AF9, // 0AF9..0AFF; GUJARATI
5760 0x0B00, // 0B00 ; UNKNOWN
5761 0x0B01, // 0B01..0B03; ORIYA
5762 0x0B04, // 0B04 ; UNKNOWN
5763 0x0B05, // 0B05..0B0C; ORIYA
5764 0x0B0D, // 0B0D..0B0E; UNKNOWN
5765 0x0B0F, // 0B0F..0B10; ORIYA
5766 0x0B11, // 0B11..0B12; UNKNOWN
5767 0x0B13, // 0B13..0B28; ORIYA
5768 0x0B29, // 0B29 ; UNKNOWN
5769 0x0B2A, // 0B2A..0B30; ORIYA
5770 0x0B31, // 0B31 ; UNKNOWN
5771 0x0B32, // 0B32..0B33; ORIYA
5772 0x0B34, // 0B34 ; UNKNOWN
5773 0x0B35, // 0B35..0B39; ORIYA
5774 0x0B3A, // 0B3A..0B3B; UNKNOWN
5775 0x0B3C, // 0B3C..0B44; ORIYA
5776 0x0B45, // 0B45..0B46; UNKNOWN
5777 0x0B47, // 0B47..0B48; ORIYA
5778 0x0B49, // 0B49..0B4A; UNKNOWN
5779 0x0B4B, // 0B4B..0B4D; ORIYA
5780 0x0B4E, // 0B4E..0B54; UNKNOWN
5781 0x0B55, // 0B55..0B57; ORIYA
5782 0x0B58, // 0B58..0B5B; UNKNOWN
5783 0x0B5C, // 0B5C..0B5D; ORIYA
5784 0x0B5E, // 0B5E ; UNKNOWN
5785 0x0B5F, // 0B5F..0B63; ORIYA
5786 0x0B64, // 0B64..0B65; UNKNOWN
5787 0x0B66, // 0B66..0B77; ORIYA
5788 0x0B78, // 0B78..0B81; UNKNOWN
5789 0x0B82, // 0B82..0B83; TAMIL
5790 0x0B84, // 0B84 ; UNKNOWN
5791 0x0B85, // 0B85..0B8A; TAMIL
5792 0x0B8B, // 0B8B..0B8D; UNKNOWN
5793 0x0B8E, // 0B8E..0B90; TAMIL
5794 0x0B91, // 0B91 ; UNKNOWN
5795 0x0B92, // 0B92..0B95; TAMIL
5796 0x0B96, // 0B96..0B98; UNKNOWN
5797 0x0B99, // 0B99..0B9A; TAMIL
5798 0x0B9B, // 0B9B ; UNKNOWN
5799 0x0B9C, // 0B9C ; TAMIL
5800 0x0B9D, // 0B9D ; UNKNOWN
5801 0x0B9E, // 0B9E..0B9F; TAMIL
5802 0x0BA0, // 0BA0..0BA2; UNKNOWN
5803 0x0BA3, // 0BA3..0BA4; TAMIL
5804 0x0BA5, // 0BA5..0BA7; UNKNOWN
5805 0x0BA8, // 0BA8..0BAA; TAMIL
5806 0x0BAB, // 0BAB..0BAD; UNKNOWN
5807 0x0BAE, // 0BAE..0BB9; TAMIL
5808 0x0BBA, // 0BBA..0BBD; UNKNOWN
5809 0x0BBE, // 0BBE..0BC2; TAMIL
5810 0x0BC3, // 0BC3..0BC5; UNKNOWN
5811 0x0BC6, // 0BC6..0BC8; TAMIL
5812 0x0BC9, // 0BC9 ; UNKNOWN
5813 0x0BCA, // 0BCA..0BCD; TAMIL
5814 0x0BCE, // 0BCE..0BCF; UNKNOWN
5815 0x0BD0, // 0BD0 ; TAMIL
5816 0x0BD1, // 0BD1..0BD6; UNKNOWN
5817 0x0BD7, // 0BD7 ; TAMIL
5818 0x0BD8, // 0BD8..0BE5; UNKNOWN
5819 0x0BE6, // 0BE6..0BFA; TAMIL
5820 0x0BFB, // 0BFB..0BFF; UNKNOWN
5821 0x0C00, // 0C00..0C0C; TELUGU
5822 0x0C0D, // 0C0D ; UNKNOWN
5823 0x0C0E, // 0C0E..0C10; TELUGU
5824 0x0C11, // 0C11 ; UNKNOWN
5825 0x0C12, // 0C12..0C28; TELUGU
5826 0x0C29, // 0C29 ; UNKNOWN
5827 0x0C2A, // 0C2A..0C39; TELUGU
5828 0x0C3A, // 0C3A..0C3B; UNKNOWN
5829 0x0C3C, // 0C3C..0C44; TELUGU
5830 0x0C45, // 0C45 ; UNKNOWN
5831 0x0C46, // 0C46..0C48; TELUGU
5832 0x0C49, // 0C49 ; UNKNOWN
5833 0x0C4A, // 0C4A..0C4D; TELUGU
5834 0x0C4E, // 0C4E..0C54; UNKNOWN
5835 0x0C55, // 0C55..0C56; TELUGU
5836 0x0C57, // 0C57 ; UNKNOWN
5837 0x0C58, // 0C58..0C5A; TELUGU
5838 0x0C5B, // 0C5B..0C5C; UNKNOWN
5839 0x0C5D, // 0C5D ; TELUGU
5840 0x0C5E, // 0C5E..0C5F; UNKNOWN
5841 0x0C60, // 0C60..0C63; TELUGU
5842 0x0C64, // 0C64..0C65; UNKNOWN
5843 0x0C66, // 0C66..0C6F; TELUGU
5844 0x0C70, // 0C70..0C76; UNKNOWN
5845 0x0C77, // 0C77..0C7F; TELUGU
5846 0x0C80, // 0C80..0C8C; KANNADA
5847 0x0C8D, // 0C8D ; UNKNOWN
5848 0x0C8E, // 0C8E..0C90; KANNADA
5849 0x0C91, // 0C91 ; UNKNOWN
5850 0x0C92, // 0C92..0CA8; KANNADA
5851 0x0CA9, // 0CA9 ; UNKNOWN
5852 0x0CAA, // 0CAA..0CB3; KANNADA
5853 0x0CB4, // 0CB4 ; UNKNOWN
5854 0x0CB5, // 0CB5..0CB9; KANNADA
5855 0x0CBA, // 0CBA..0CBB; UNKNOWN
5856 0x0CBC, // 0CBC..0CC4; KANNADA
5857 0x0CC5, // 0CC5 ; UNKNOWN
5858 0x0CC6, // 0CC6..0CC8; KANNADA
5859 0x0CC9, // 0CC9 ; UNKNOWN
5860 0x0CCA, // 0CCA..0CCD; KANNADA
5861 0x0CCE, // 0CCE..0CD4; UNKNOWN
5862 0x0CD5, // 0CD5..0CD6; KANNADA
5863 0x0CD7, // 0CD7..0CDC; UNKNOWN
5864 0x0CDD, // 0CDD..0CDE; KANNADA
5865 0x0CDF, // 0CDF ; UNKNOWN
5866 0x0CE0, // 0CE0..0CE3; KANNADA
5867 0x0CE4, // 0CE4..0CE5; UNKNOWN
5868 0x0CE6, // 0CE6..0CEF; KANNADA
5869 0x0CF0, // 0CF0 ; UNKNOWN
5870 0x0CF1, // 0CF1..0CF3; KANNADA
5871 0x0CF4, // 0CF4..0CFF; UNKNOWN
5872 0x0D00, // 0D00..0D0C; MALAYALAM
5873 0x0D0D, // 0D0D ; UNKNOWN
5874 0x0D0E, // 0D0E..0D10; MALAYALAM
5875 0x0D11, // 0D11 ; UNKNOWN
5876 0x0D12, // 0D12..0D44; MALAYALAM
5877 0x0D45, // 0D45 ; UNKNOWN
5878 0x0D46, // 0D46..0D48; MALAYALAM
5879 0x0D49, // 0D49 ; UNKNOWN
5880 0x0D4A, // 0D4A..0D4F; MALAYALAM
5881 0x0D50, // 0D50..0D53; UNKNOWN
5882 0x0D54, // 0D54..0D63; MALAYALAM
5883 0x0D64, // 0D64..0D65; UNKNOWN
5884 0x0D66, // 0D66..0D7F; MALAYALAM
5885 0x0D80, // 0D80 ; UNKNOWN
5886 0x0D81, // 0D81..0D83; SINHALA
5887 0x0D84, // 0D84 ; UNKNOWN
5888 0x0D85, // 0D85..0D96; SINHALA
5889 0x0D97, // 0D97..0D99; UNKNOWN
5890 0x0D9A, // 0D9A..0DB1; SINHALA
5891 0x0DB2, // 0DB2 ; UNKNOWN
5892 0x0DB3, // 0DB3..0DBB; SINHALA
5893 0x0DBC, // 0DBC ; UNKNOWN
5894 0x0DBD, // 0DBD ; SINHALA
5895 0x0DBE, // 0DBE..0DBF; UNKNOWN
5896 0x0DC0, // 0DC0..0DC6; SINHALA
5897 0x0DC7, // 0DC7..0DC9; UNKNOWN
5898 0x0DCA, // 0DCA ; SINHALA
5899 0x0DCB, // 0DCB..0DCE; UNKNOWN
5900 0x0DCF, // 0DCF..0DD4; SINHALA
5901 0x0DD5, // 0DD5 ; UNKNOWN
5902 0x0DD6, // 0DD6 ; SINHALA
5903 0x0DD7, // 0DD7 ; UNKNOWN
5904 0x0DD8, // 0DD8..0DDF; SINHALA
5905 0x0DE0, // 0DE0..0DE5; UNKNOWN
5906 0x0DE6, // 0DE6..0DEF; SINHALA
5907 0x0DF0, // 0DF0..0DF1; UNKNOWN
5908 0x0DF2, // 0DF2..0DF4; SINHALA
5909 0x0DF5, // 0DF5..0E00; UNKNOWN
5910 0x0E01, // 0E01..0E3A; THAI
5911 0x0E3B, // 0E3B..0E3E; UNKNOWN
5912 0x0E3F, // 0E3F ; COMMON
5913 0x0E40, // 0E40..0E5B; THAI
5914 0x0E5C, // 0E5C..0E80; UNKNOWN
5915 0x0E81, // 0E81..0E82; LAO
5916 0x0E83, // 0E83 ; UNKNOWN
5917 0x0E84, // 0E84 ; LAO
5918 0x0E85, // 0E85 ; UNKNOWN
5919 0x0E86, // 0E86..0E8A; LAO
5920 0x0E8B, // 0E8B ; UNKNOWN
5921 0x0E8C, // 0E8C..0EA3; LAO
5922 0x0EA4, // 0EA4 ; UNKNOWN
5923 0x0EA5, // 0EA5 ; LAO
5924 0x0EA6, // 0EA6 ; UNKNOWN
5925 0x0EA7, // 0EA7..0EBD; LAO
5926 0x0EBE, // 0EBE..0EBF; UNKNOWN
5927 0x0EC0, // 0EC0..0EC4; LAO
5928 0x0EC5, // 0EC5 ; UNKNOWN
5929 0x0EC6, // 0EC6 ; LAO
5930 0x0EC7, // 0EC7 ; UNKNOWN
5931 0x0EC8, // 0EC8..0ECE; LAO
5932 0x0ECF, // 0ECF ; UNKNOWN
5933 0x0ED0, // 0ED0..0ED9; LAO
5934 0x0EDA, // 0EDA..0EDB; UNKNOWN
5935 0x0EDC, // 0EDC..0EDF; LAO
5936 0x0EE0, // 0EE0..0EFF; UNKNOWN
5937 0x0F00, // 0F00..0F47; TIBETAN
5938 0x0F48, // 0F48 ; UNKNOWN
5939 0x0F49, // 0F49..0F6C; TIBETAN
5940 0x0F6D, // 0F6D..0F70; UNKNOWN
5941 0x0F71, // 0F71..0F97; TIBETAN
5942 0x0F98, // 0F98 ; UNKNOWN
5943 0x0F99, // 0F99..0FBC; TIBETAN
5944 0x0FBD, // 0FBD ; UNKNOWN
5945 0x0FBE, // 0FBE..0FCC; TIBETAN
5946 0x0FCD, // 0FCD ; UNKNOWN
5947 0x0FCE, // 0FCE..0FD4; TIBETAN
5948 0x0FD5, // 0FD5..0FD8; COMMON
5949 0x0FD9, // 0FD9..0FDA; TIBETAN
5950 0x0FDB, // 0FDB..0FFF; UNKNOWN
5951 0x1000, // 1000..109F; MYANMAR
5952 0x10A0, // 10A0..10C5; GEORGIAN
5953 0x10C6, // 10C6 ; UNKNOWN
5954 0x10C7, // 10C7 ; GEORGIAN
5955 0x10C8, // 10C8..10CC; UNKNOWN
5956 0x10CD, // 10CD ; GEORGIAN
5957 0x10CE, // 10CE..10CF; UNKNOWN
5958 0x10D0, // 10D0..10FA; GEORGIAN
5959 0x10FB, // 10FB ; COMMON
5960 0x10FC, // 10FC..10FF; GEORGIAN
5961 0x1100, // 1100..11FF; HANGUL
5962 0x1200, // 1200..1248; ETHIOPIC
5963 0x1249, // 1249 ; UNKNOWN
5964 0x124A, // 124A..124D; ETHIOPIC
5965 0x124E, // 124E..124F; UNKNOWN
5966 0x1250, // 1250..1256; ETHIOPIC
5967 0x1257, // 1257 ; UNKNOWN
5968 0x1258, // 1258 ; ETHIOPIC
5969 0x1259, // 1259 ; UNKNOWN
5970 0x125A, // 125A..125D; ETHIOPIC
5971 0x125E, // 125E..125F; UNKNOWN
5972 0x1260, // 1260..1288; ETHIOPIC
5973 0x1289, // 1289 ; UNKNOWN
5974 0x128A, // 128A..128D; ETHIOPIC
5975 0x128E, // 128E..128F; UNKNOWN
5976 0x1290, // 1290..12B0; ETHIOPIC
5977 0x12B1, // 12B1 ; UNKNOWN
5978 0x12B2, // 12B2..12B5; ETHIOPIC
5979 0x12B6, // 12B6..12B7; UNKNOWN
5980 0x12B8, // 12B8..12BE; ETHIOPIC
5981 0x12BF, // 12BF ; UNKNOWN
5982 0x12C0, // 12C0 ; ETHIOPIC
5983 0x12C1, // 12C1 ; UNKNOWN
5984 0x12C2, // 12C2..12C5; ETHIOPIC
5985 0x12C6, // 12C6..12C7; UNKNOWN
5986 0x12C8, // 12C8..12D6; ETHIOPIC
5987 0x12D7, // 12D7 ; UNKNOWN
5988 0x12D8, // 12D8..1310; ETHIOPIC
5989 0x1311, // 1311 ; UNKNOWN
5990 0x1312, // 1312..1315; ETHIOPIC
5991 0x1316, // 1316..1317; UNKNOWN
5992 0x1318, // 1318..135A; ETHIOPIC
5993 0x135B, // 135B..135C; UNKNOWN
5994 0x135D, // 135D..137C; ETHIOPIC
5995 0x137D, // 137D..137F; UNKNOWN
5996 0x1380, // 1380..1399; ETHIOPIC
5997 0x139A, // 139A..139F; UNKNOWN
5998 0x13A0, // 13A0..13F5; CHEROKEE
5999 0x13F6, // 13F6..13F7; UNKNOWN
6000 0x13F8, // 13F8..13FD; CHEROKEE
6001 0x13FE, // 13FE..13FF; UNKNOWN
6002 0x1400, // 1400..167F; CANADIAN_ABORIGINAL
6003 0x1680, // 1680..169C; OGHAM
6004 0x169D, // 169D..169F; UNKNOWN
6005 0x16A0, // 16A0..16EA; RUNIC
6006 0x16EB, // 16EB..16ED; COMMON
6007 0x16EE, // 16EE..16F8; RUNIC
6008 0x16F9, // 16F9..16FF; UNKNOWN
6009 0x1700, // 1700..1715; TAGALOG
6010 0x1716, // 1716..171E; UNKNOWN
6011 0x171F, // 171F ; TAGALOG
6012 0x1720, // 1720..1734; HANUNOO
6013 0x1735, // 1735..1736; COMMON
6014 0x1737, // 1737..173F; UNKNOWN
6015 0x1740, // 1740..1753; BUHID
6016 0x1754, // 1754..175F; UNKNOWN
6017 0x1760, // 1760..176C; TAGBANWA
6018 0x176D, // 176D ; UNKNOWN
6019 0x176E, // 176E..1770; TAGBANWA
6020 0x1771, // 1771 ; UNKNOWN
6021 0x1772, // 1772..1773; TAGBANWA
6022 0x1774, // 1774..177F; UNKNOWN
6023 0x1780, // 1780..17DD; KHMER
6024 0x17DE, // 17DE..17DF; UNKNOWN
6025 0x17E0, // 17E0..17E9; KHMER
6026 0x17EA, // 17EA..17EF; UNKNOWN
6027 0x17F0, // 17F0..17F9; KHMER
6028 0x17FA, // 17FA..17FF; UNKNOWN
6029 0x1800, // 1800..1801; MONGOLIAN
6030 0x1802, // 1802..1803; COMMON
6031 0x1804, // 1804 ; MONGOLIAN
6032 0x1805, // 1805 ; COMMON
6033 0x1806, // 1806..1819; MONGOLIAN
6034 0x181A, // 181A..181F; UNKNOWN
6035 0x1820, // 1820..1878; MONGOLIAN
6036 0x1879, // 1879..187F; UNKNOWN
6037 0x1880, // 1880..18AA; MONGOLIAN
6038 0x18AB, // 18AB..18AF; UNKNOWN
6039 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL
6040 0x18F6, // 18F6..18FF; UNKNOWN
6041 0x1900, // 1900..191E; LIMBU
6042 0x191F, // 191F ; UNKNOWN
6043 0x1920, // 1920..192B; LIMBU
6044 0x192C, // 192C..192F; UNKNOWN
6045 0x1930, // 1930..193B; LIMBU
6046 0x193C, // 193C..193F; UNKNOWN
6047 0x1940, // 1940 ; LIMBU
6048 0x1941, // 1941..1943; UNKNOWN
6049 0x1944, // 1944..194F; LIMBU
6050 0x1950, // 1950..196D; TAI_LE
6051 0x196E, // 196E..196F; UNKNOWN
6052 0x1970, // 1970..1974; TAI_LE
6053 0x1975, // 1975..197F; UNKNOWN
6054 0x1980, // 1980..19AB; NEW_TAI_LUE
6055 0x19AC, // 19AC..19AF; UNKNOWN
6056 0x19B0, // 19B0..19C9; NEW_TAI_LUE
6057 0x19CA, // 19CA..19CF; UNKNOWN
6058 0x19D0, // 19D0..19DA; NEW_TAI_LUE
6059 0x19DB, // 19DB..19DD; UNKNOWN
6060 0x19DE, // 19DE..19DF; NEW_TAI_LUE
6061 0x19E0, // 19E0..19FF; KHMER
6062 0x1A00, // 1A00..1A1B; BUGINESE
6063 0x1A1C, // 1A1C..1A1D; UNKNOWN
6064 0x1A1E, // 1A1E..1A1F; BUGINESE
6065 0x1A20, // 1A20..1A5E; TAI_THAM
6066 0x1A5F, // 1A5F ; UNKNOWN
6067 0x1A60, // 1A60..1A7C; TAI_THAM
6068 0x1A7D, // 1A7D..1A7E; UNKNOWN
6069 0x1A7F, // 1A7F..1A89; TAI_THAM
6070 0x1A8A, // 1A8A..1A8F; UNKNOWN
6071 0x1A90, // 1A90..1A99; TAI_THAM
6072 0x1A9A, // 1A9A..1A9F; UNKNOWN
6073 0x1AA0, // 1AA0..1AAD; TAI_THAM
6074 0x1AAE, // 1AAE..1AAF; UNKNOWN
6075 0x1AB0, // 1AB0..1ACE; INHERITED
6076 0x1ACF, // 1ACF..1AFF; UNKNOWN
6077 0x1B00, // 1B00..1B4C; BALINESE
6078 0x1B4D, // 1B4D ; UNKNOWN
6079 0x1B4E, // 1B4E..1B7F; BALINESE
6080 0x1B80, // 1B80..1BBF; SUNDANESE
6081 0x1BC0, // 1BC0..1BF3; BATAK
6082 0x1BF4, // 1BF4..1BFB; UNKNOWN
6083 0x1BFC, // 1BFC..1BFF; BATAK
6084 0x1C00, // 1C00..1C37; LEPCHA
6085 0x1C38, // 1C38..1C3A; UNKNOWN
6086 0x1C3B, // 1C3B..1C49; LEPCHA
6087 0x1C4A, // 1C4A..1C4C; UNKNOWN
6088 0x1C4D, // 1C4D..1C4F; LEPCHA
6089 0x1C50, // 1C50..1C7F; OL_CHIKI
6090 0x1C80, // 1C80..1C8A; CYRILLIC
6091 0x1C8B, // 1C8B..1C8F; UNKNOWN
6092 0x1C90, // 1C90..1CBA; GEORGIAN
6093 0x1CBB, // 1CBB..1CBC; UNKNOWN
6094 0x1CBD, // 1CBD..1CBF; GEORGIAN
6095 0x1CC0, // 1CC0..1CC7; SUNDANESE
6096 0x1CC8, // 1CC8..1CCF; UNKNOWN
6097 0x1CD0, // 1CD0..1CD2; INHERITED
6098 0x1CD3, // 1CD3 ; COMMON
6099 0x1CD4, // 1CD4..1CE0; INHERITED
6100 0x1CE1, // 1CE1 ; COMMON
6101 0x1CE2, // 1CE2..1CE8; INHERITED
6102 0x1CE9, // 1CE9..1CEC; COMMON
6103 0x1CED, // 1CED ; INHERITED
6104 0x1CEE, // 1CEE..1CF3; COMMON
6105 0x1CF4, // 1CF4 ; INHERITED
6106 0x1CF5, // 1CF5..1CF7; COMMON
6107 0x1CF8, // 1CF8..1CF9; INHERITED
6108 0x1CFA, // 1CFA ; COMMON
6109 0x1CFB, // 1CFB..1CFF; UNKNOWN
6110 0x1D00, // 1D00..1D25; LATIN
6111 0x1D26, // 1D26..1D2A; GREEK
6112 0x1D2B, // 1D2B ; CYRILLIC
6113 0x1D2C, // 1D2C..1D5C; LATIN
6114 0x1D5D, // 1D5D..1D61; GREEK
6115 0x1D62, // 1D62..1D65; LATIN
6116 0x1D66, // 1D66..1D6A; GREEK
6117 0x1D6B, // 1D6B..1D77; LATIN
6118 0x1D78, // 1D78 ; CYRILLIC
6119 0x1D79, // 1D79..1DBE; LATIN
6120 0x1DBF, // 1DBF ; GREEK
6121 0x1DC0, // 1DC0..1DFF; INHERITED
6122 0x1E00, // 1E00..1EFF; LATIN
6123 0x1F00, // 1F00..1F15; GREEK
6124 0x1F16, // 1F16..1F17; UNKNOWN
6125 0x1F18, // 1F18..1F1D; GREEK
6126 0x1F1E, // 1F1E..1F1F; UNKNOWN
6127 0x1F20, // 1F20..1F45; GREEK
6128 0x1F46, // 1F46..1F47; UNKNOWN
6129 0x1F48, // 1F48..1F4D; GREEK
6130 0x1F4E, // 1F4E..1F4F; UNKNOWN
6131 0x1F50, // 1F50..1F57; GREEK
6132 0x1F58, // 1F58 ; UNKNOWN
6133 0x1F59, // 1F59 ; GREEK
6134 0x1F5A, // 1F5A ; UNKNOWN
6135 0x1F5B, // 1F5B ; GREEK
6136 0x1F5C, // 1F5C ; UNKNOWN
6137 0x1F5D, // 1F5D ; GREEK
6138 0x1F5E, // 1F5E ; UNKNOWN
6139 0x1F5F, // 1F5F..1F7D; GREEK
6140 0x1F7E, // 1F7E..1F7F; UNKNOWN
6141 0x1F80, // 1F80..1FB4; GREEK
6142 0x1FB5, // 1FB5 ; UNKNOWN
6143 0x1FB6, // 1FB6..1FC4; GREEK
6144 0x1FC5, // 1FC5 ; UNKNOWN
6145 0x1FC6, // 1FC6..1FD3; GREEK
6146 0x1FD4, // 1FD4..1FD5; UNKNOWN
6147 0x1FD6, // 1FD6..1FDB; GREEK
6148 0x1FDC, // 1FDC ; UNKNOWN
6149 0x1FDD, // 1FDD..1FEF; GREEK
6150 0x1FF0, // 1FF0..1FF1; UNKNOWN
6151 0x1FF2, // 1FF2..1FF4; GREEK
6152 0x1FF5, // 1FF5 ; UNKNOWN
6153 0x1FF6, // 1FF6..1FFE; GREEK
6154 0x1FFF, // 1FFF ; UNKNOWN
6155 0x2000, // 2000..200B; COMMON
6156 0x200C, // 200C..200D; INHERITED
6157 0x200E, // 200E..2064; COMMON
6158 0x2065, // 2065 ; UNKNOWN
6159 0x2066, // 2066..2070; COMMON
6160 0x2071, // 2071 ; LATIN
6161 0x2072, // 2072..2073; UNKNOWN
6162 0x2074, // 2074..207E; COMMON
6163 0x207F, // 207F ; LATIN
6164 0x2080, // 2080..208E; COMMON
6165 0x208F, // 208F ; UNKNOWN
6166 0x2090, // 2090..209C; LATIN
6167 0x209D, // 209D..209F; UNKNOWN
6168 0x20A0, // 20A0..20C0; COMMON
6169 0x20C1, // 20C1..20CF; UNKNOWN
6170 0x20D0, // 20D0..20F0; INHERITED
6171 0x20F1, // 20F1..20FF; UNKNOWN
6172 0x2100, // 2100..2125; COMMON
6173 0x2126, // 2126 ; GREEK
6174 0x2127, // 2127..2129; COMMON
6175 0x212A, // 212A..212B; LATIN
6176 0x212C, // 212C..2131; COMMON
6177 0x2132, // 2132 ; LATIN
6178 0x2133, // 2133..214D; COMMON
6179 0x214E, // 214E ; LATIN
6180 0x214F, // 214F..215F; COMMON
6181 0x2160, // 2160..2188; LATIN
6182 0x2189, // 2189..218B; COMMON
6183 0x218C, // 218C..218F; UNKNOWN
6184 0x2190, // 2190..2429; COMMON
6185 0x242A, // 242A..243F; UNKNOWN
6186 0x2440, // 2440..244A; COMMON
6187 0x244B, // 244B..245F; UNKNOWN
6188 0x2460, // 2460..27FF; COMMON
6189 0x2800, // 2800..28FF; BRAILLE
6190 0x2900, // 2900..2B73; COMMON
6191 0x2B74, // 2B74..2B75; UNKNOWN
6192 0x2B76, // 2B76..2B95; COMMON
6193 0x2B96, // 2B96 ; UNKNOWN
6194 0x2B97, // 2B97..2BFF; COMMON
6195 0x2C00, // 2C00..2C5F; GLAGOLITIC
6196 0x2C60, // 2C60..2C7F; LATIN
6197 0x2C80, // 2C80..2CF3; COPTIC
6198 0x2CF4, // 2CF4..2CF8; UNKNOWN
6199 0x2CF9, // 2CF9..2CFF; COPTIC
6200 0x2D00, // 2D00..2D25; GEORGIAN
6201 0x2D26, // 2D26 ; UNKNOWN
6202 0x2D27, // 2D27 ; GEORGIAN
6203 0x2D28, // 2D28..2D2C; UNKNOWN
6204 0x2D2D, // 2D2D ; GEORGIAN
6205 0x2D2E, // 2D2E..2D2F; UNKNOWN
6206 0x2D30, // 2D30..2D67; TIFINAGH
6207 0x2D68, // 2D68..2D6E; UNKNOWN
6208 0x2D6F, // 2D6F..2D70; TIFINAGH
6209 0x2D71, // 2D71..2D7E; UNKNOWN
6210 0x2D7F, // 2D7F ; TIFINAGH
6211 0x2D80, // 2D80..2D96; ETHIOPIC
6212 0x2D97, // 2D97..2D9F; UNKNOWN
6213 0x2DA0, // 2DA0..2DA6; ETHIOPIC
6214 0x2DA7, // 2DA7 ; UNKNOWN
6215 0x2DA8, // 2DA8..2DAE; ETHIOPIC
6216 0x2DAF, // 2DAF ; UNKNOWN
6217 0x2DB0, // 2DB0..2DB6; ETHIOPIC
6218 0x2DB7, // 2DB7 ; UNKNOWN
6219 0x2DB8, // 2DB8..2DBE; ETHIOPIC
6220 0x2DBF, // 2DBF ; UNKNOWN
6221 0x2DC0, // 2DC0..2DC6; ETHIOPIC
6222 0x2DC7, // 2DC7 ; UNKNOWN
6223 0x2DC8, // 2DC8..2DCE; ETHIOPIC
6224 0x2DCF, // 2DCF ; UNKNOWN
6225 0x2DD0, // 2DD0..2DD6; ETHIOPIC
6226 0x2DD7, // 2DD7 ; UNKNOWN
6227 0x2DD8, // 2DD8..2DDE; ETHIOPIC
6228 0x2DDF, // 2DDF ; UNKNOWN
6229 0x2DE0, // 2DE0..2DFF; CYRILLIC
6230 0x2E00, // 2E00..2E5D; COMMON
6231 0x2E5E, // 2E5E..2E7F; UNKNOWN
6232 0x2E80, // 2E80..2E99; HAN
6233 0x2E9A, // 2E9A ; UNKNOWN
6234 0x2E9B, // 2E9B..2EF3; HAN
6235 0x2EF4, // 2EF4..2EFF; UNKNOWN
6236 0x2F00, // 2F00..2FD5; HAN
6237 0x2FD6, // 2FD6..2FEF; UNKNOWN
6238 0x2FF0, // 2FF0..3004; COMMON
6239 0x3005, // 3005 ; HAN
6240 0x3006, // 3006 ; COMMON
6241 0x3007, // 3007 ; HAN
6242 0x3008, // 3008..3020; COMMON
6243 0x3021, // 3021..3029; HAN
6244 0x302A, // 302A..302D; INHERITED
6245 0x302E, // 302E..302F; HANGUL
6246 0x3030, // 3030..3037; COMMON
6247 0x3038, // 3038..303B; HAN
6248 0x303C, // 303C..303F; COMMON
6249 0x3040, // 3040 ; UNKNOWN
6250 0x3041, // 3041..3096; HIRAGANA
6251 0x3097, // 3097..3098; UNKNOWN
6252 0x3099, // 3099..309A; INHERITED
6253 0x309B, // 309B..309C; COMMON
6254 0x309D, // 309D..309F; HIRAGANA
6255 0x30A0, // 30A0 ; COMMON
6256 0x30A1, // 30A1..30FA; KATAKANA
6257 0x30FB, // 30FB..30FC; COMMON
6258 0x30FD, // 30FD..30FF; KATAKANA
6259 0x3100, // 3100..3104; UNKNOWN
6260 0x3105, // 3105..312F; BOPOMOFO
6261 0x3130, // 3130 ; UNKNOWN
6262 0x3131, // 3131..318E; HANGUL
6263 0x318F, // 318F ; UNKNOWN
6264 0x3190, // 3190..319F; COMMON
6265 0x31A0, // 31A0..31BF; BOPOMOFO
6266 0x31C0, // 31C0..31E5; COMMON
6267 0x31E6, // 31E6..31EE; UNKNOWN
6268 0x31EF, // 31EF ; COMMON
6269 0x31F0, // 31F0..31FF; KATAKANA
6270 0x3200, // 3200..321E; HANGUL
6271 0x321F, // 321F ; UNKNOWN
6272 0x3220, // 3220..325F; COMMON
6273 0x3260, // 3260..327E; HANGUL
6274 0x327F, // 327F..32CF; COMMON
6275 0x32D0, // 32D0..32FE; KATAKANA
6276 0x32FF, // 32FF ; COMMON
6277 0x3300, // 3300..3357; KATAKANA
6278 0x3358, // 3358..33FF; COMMON
6279 0x3400, // 3400..4DBF; HAN
6280 0x4DC0, // 4DC0..4DFF; COMMON
6281 0x4E00, // 4E00..9FFF; HAN
6282 0xA000, // A000..A48C; YI
6283 0xA48D, // A48D..A48F; UNKNOWN
6284 0xA490, // A490..A4C6; YI
6285 0xA4C7, // A4C7..A4CF; UNKNOWN
6286 0xA4D0, // A4D0..A4FF; LISU
6287 0xA500, // A500..A62B; VAI
6288 0xA62C, // A62C..A63F; UNKNOWN
6289 0xA640, // A640..A69F; CYRILLIC
6290 0xA6A0, // A6A0..A6F7; BAMUM
6291 0xA6F8, // A6F8..A6FF; UNKNOWN
6292 0xA700, // A700..A721; COMMON
6293 0xA722, // A722..A787; LATIN
6294 0xA788, // A788..A78A; COMMON
6295 0xA78B, // A78B..A7CD; LATIN
6296 0xA7CE, // A7CE..A7CF; UNKNOWN
6297 0xA7D0, // A7D0..A7D1; LATIN
6298 0xA7D2, // A7D2 ; UNKNOWN
6299 0xA7D3, // A7D3 ; LATIN
6300 0xA7D4, // A7D4 ; UNKNOWN
6301 0xA7D5, // A7D5..A7DC; LATIN
6302 0xA7DD, // A7DD..A7F1; UNKNOWN
6303 0xA7F2, // A7F2..A7FF; LATIN
6304 0xA800, // A800..A82C; SYLOTI_NAGRI
6305 0xA82D, // A82D..A82F; UNKNOWN
6306 0xA830, // A830..A839; COMMON
6307 0xA83A, // A83A..A83F; UNKNOWN
6308 0xA840, // A840..A877; PHAGS_PA
6309 0xA878, // A878..A87F; UNKNOWN
6310 0xA880, // A880..A8C5; SAURASHTRA
6311 0xA8C6, // A8C6..A8CD; UNKNOWN
6312 0xA8CE, // A8CE..A8D9; SAURASHTRA
6313 0xA8DA, // A8DA..A8DF; UNKNOWN
6314 0xA8E0, // A8E0..A8FF; DEVANAGARI
6315 0xA900, // A900..A92D; KAYAH_LI
6316 0xA92E, // A92E ; COMMON
6317 0xA92F, // A92F ; KAYAH_LI
6318 0xA930, // A930..A953; REJANG
6319 0xA954, // A954..A95E; UNKNOWN
6320 0xA95F, // A95F ; REJANG
6321 0xA960, // A960..A97C; HANGUL
6322 0xA97D, // A97D..A97F; UNKNOWN
6323 0xA980, // A980..A9CD; JAVANESE
6324 0xA9CE, // A9CE ; UNKNOWN
6325 0xA9CF, // A9CF ; COMMON
6326 0xA9D0, // A9D0..A9D9; JAVANESE
6327 0xA9DA, // A9DA..A9DD; UNKNOWN
6328 0xA9DE, // A9DE..A9DF; JAVANESE
6329 0xA9E0, // A9E0..A9FE; MYANMAR
6330 0xA9FF, // A9FF ; UNKNOWN
6331 0xAA00, // AA00..AA36; CHAM
6332 0xAA37, // AA37..AA3F; UNKNOWN
6333 0xAA40, // AA40..AA4D; CHAM
6334 0xAA4E, // AA4E..AA4F; UNKNOWN
6335 0xAA50, // AA50..AA59; CHAM
6336 0xAA5A, // AA5A..AA5B; UNKNOWN
6337 0xAA5C, // AA5C..AA5F; CHAM
6338 0xAA60, // AA60..AA7F; MYANMAR
6339 0xAA80, // AA80..AAC2; TAI_VIET
6340 0xAAC3, // AAC3..AADA; UNKNOWN
6341 0xAADB, // AADB..AADF; TAI_VIET
6342 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK
6343 0xAAF7, // AAF7..AB00; UNKNOWN
6344 0xAB01, // AB01..AB06; ETHIOPIC
6345 0xAB07, // AB07..AB08; UNKNOWN
6346 0xAB09, // AB09..AB0E; ETHIOPIC
6347 0xAB0F, // AB0F..AB10; UNKNOWN
6348 0xAB11, // AB11..AB16; ETHIOPIC
6349 0xAB17, // AB17..AB1F; UNKNOWN
6350 0xAB20, // AB20..AB26; ETHIOPIC
6351 0xAB27, // AB27 ; UNKNOWN
6352 0xAB28, // AB28..AB2E; ETHIOPIC
6353 0xAB2F, // AB2F ; UNKNOWN
6354 0xAB30, // AB30..AB5A; LATIN
6355 0xAB5B, // AB5B ; COMMON
6356 0xAB5C, // AB5C..AB64; LATIN
6357 0xAB65, // AB65 ; GREEK
6358 0xAB66, // AB66..AB69; LATIN
6359 0xAB6A, // AB6A..AB6B; COMMON
6360 0xAB6C, // AB6C..AB6F; UNKNOWN
6361 0xAB70, // AB70..ABBF; CHEROKEE
6362 0xABC0, // ABC0..ABED; MEETEI_MAYEK
6363 0xABEE, // ABEE..ABEF; UNKNOWN
6364 0xABF0, // ABF0..ABF9; MEETEI_MAYEK
6365 0xABFA, // ABFA..ABFF; UNKNOWN
6366 0xAC00, // AC00..D7A3; HANGUL
6367 0xD7A4, // D7A4..D7AF; UNKNOWN
6368 0xD7B0, // D7B0..D7C6; HANGUL
6369 0xD7C7, // D7C7..D7CA; UNKNOWN
6370 0xD7CB, // D7CB..D7FB; HANGUL
6371 0xD7FC, // D7FC..F8FF; UNKNOWN
6372 0xF900, // F900..FA6D; HAN
6373 0xFA6E, // FA6E..FA6F; UNKNOWN
6374 0xFA70, // FA70..FAD9; HAN
6375 0xFADA, // FADA..FAFF; UNKNOWN
6376 0xFB00, // FB00..FB06; LATIN
6377 0xFB07, // FB07..FB12; UNKNOWN
6378 0xFB13, // FB13..FB17; ARMENIAN
6379 0xFB18, // FB18..FB1C; UNKNOWN
6380 0xFB1D, // FB1D..FB36; HEBREW
6381 0xFB37, // FB37 ; UNKNOWN
6382 0xFB38, // FB38..FB3C; HEBREW
6383 0xFB3D, // FB3D ; UNKNOWN
6384 0xFB3E, // FB3E ; HEBREW
6385 0xFB3F, // FB3F ; UNKNOWN
6386 0xFB40, // FB40..FB41; HEBREW
6387 0xFB42, // FB42 ; UNKNOWN
6388 0xFB43, // FB43..FB44; HEBREW
6389 0xFB45, // FB45 ; UNKNOWN
6390 0xFB46, // FB46..FB4F; HEBREW
6391 0xFB50, // FB50..FBC2; ARABIC
6392 0xFBC3, // FBC3..FBD2; UNKNOWN
6393 0xFBD3, // FBD3..FD3D; ARABIC
6394 0xFD3E, // FD3E..FD3F; COMMON
6395 0xFD40, // FD40..FD8F; ARABIC
6396 0xFD90, // FD90..FD91; UNKNOWN
6397 0xFD92, // FD92..FDC7; ARABIC
6398 0xFDC8, // FDC8..FDCE; UNKNOWN
6399 0xFDCF, // FDCF ; ARABIC
6400 0xFDD0, // FDD0..FDEF; UNKNOWN
6401 0xFDF0, // FDF0..FDFF; ARABIC
6402 0xFE00, // FE00..FE0F; INHERITED
6403 0xFE10, // FE10..FE19; COMMON
6404 0xFE1A, // FE1A..FE1F; UNKNOWN
6405 0xFE20, // FE20..FE2D; INHERITED
6406 0xFE2E, // FE2E..FE2F; CYRILLIC
6407 0xFE30, // FE30..FE52; COMMON
6408 0xFE53, // FE53 ; UNKNOWN
6409 0xFE54, // FE54..FE66; COMMON
6410 0xFE67, // FE67 ; UNKNOWN
6411 0xFE68, // FE68..FE6B; COMMON
6412 0xFE6C, // FE6C..FE6F; UNKNOWN
6413 0xFE70, // FE70..FE74; ARABIC
6414 0xFE75, // FE75 ; UNKNOWN
6415 0xFE76, // FE76..FEFC; ARABIC
6416 0xFEFD, // FEFD..FEFE; UNKNOWN
6417 0xFEFF, // FEFF ; COMMON
6418 0xFF00, // FF00 ; UNKNOWN
6419 0xFF01, // FF01..FF20; COMMON
6420 0xFF21, // FF21..FF3A; LATIN
6421 0xFF3B, // FF3B..FF40; COMMON
6422 0xFF41, // FF41..FF5A; LATIN
6423 0xFF5B, // FF5B..FF65; COMMON
6424 0xFF66, // FF66..FF6F; KATAKANA
6425 0xFF70, // FF70 ; COMMON
6426 0xFF71, // FF71..FF9D; KATAKANA
6427 0xFF9E, // FF9E..FF9F; COMMON
6428 0xFFA0, // FFA0..FFBE; HANGUL
6429 0xFFBF, // FFBF..FFC1; UNKNOWN
6430 0xFFC2, // FFC2..FFC7; HANGUL
6431 0xFFC8, // FFC8..FFC9; UNKNOWN
6432 0xFFCA, // FFCA..FFCF; HANGUL
6433 0xFFD0, // FFD0..FFD1; UNKNOWN
6434 0xFFD2, // FFD2..FFD7; HANGUL
6435 0xFFD8, // FFD8..FFD9; UNKNOWN
6436 0xFFDA, // FFDA..FFDC; HANGUL
6437 0xFFDD, // FFDD..FFDF; UNKNOWN
6438 0xFFE0, // FFE0..FFE6; COMMON
6439 0xFFE7, // FFE7 ; UNKNOWN
6440 0xFFE8, // FFE8..FFEE; COMMON
6441 0xFFEF, // FFEF..FFF8; UNKNOWN
6442 0xFFF9, // FFF9..FFFD; COMMON
6443 0xFFFE, // FFFE..FFFF; UNKNOWN
6444 0x10000, // 10000..1000B; LINEAR_B
6445 0x1000C, // 1000C ; UNKNOWN
6446 0x1000D, // 1000D..10026; LINEAR_B
6447 0x10027, // 10027 ; UNKNOWN
6448 0x10028, // 10028..1003A; LINEAR_B
6449 0x1003B, // 1003B ; UNKNOWN
6450 0x1003C, // 1003C..1003D; LINEAR_B
6451 0x1003E, // 1003E ; UNKNOWN
6452 0x1003F, // 1003F..1004D; LINEAR_B
6453 0x1004E, // 1004E..1004F; UNKNOWN
6454 0x10050, // 10050..1005D; LINEAR_B
6455 0x1005E, // 1005E..1007F; UNKNOWN
6456 0x10080, // 10080..100FA; LINEAR_B
6457 0x100FB, // 100FB..100FF; UNKNOWN
6458 0x10100, // 10100..10102; COMMON
6459 0x10103, // 10103..10106; UNKNOWN
6460 0x10107, // 10107..10133; COMMON
6461 0x10134, // 10134..10136; UNKNOWN
6462 0x10137, // 10137..1013F; COMMON
6463 0x10140, // 10140..1018E; GREEK
6464 0x1018F, // 1018F ; UNKNOWN
6465 0x10190, // 10190..1019C; COMMON
6466 0x1019D, // 1019D..1019F; UNKNOWN
6467 0x101A0, // 101A0 ; GREEK
6468 0x101A1, // 101A1..101CF; UNKNOWN
6469 0x101D0, // 101D0..101FC; COMMON
6470 0x101FD, // 101FD ; INHERITED
6471 0x101FE, // 101FE..1027F; UNKNOWN
6472 0x10280, // 10280..1029C; LYCIAN
6473 0x1029D, // 1029D..1029F; UNKNOWN
6474 0x102A0, // 102A0..102D0; CARIAN
6475 0x102D1, // 102D1..102DF; UNKNOWN
6476 0x102E0, // 102E0 ; INHERITED
6477 0x102E1, // 102E1..102FB; COMMON
6478 0x102FC, // 102FC..102FF; UNKNOWN
6479 0x10300, // 10300..10323; OLD_ITALIC
6480 0x10324, // 10324..1032C; UNKNOWN
6481 0x1032D, // 1032D..1032F; OLD_ITALIC
6482 0x10330, // 10330..1034A; GOTHIC
6483 0x1034B, // 1034B..1034F; UNKNOWN
6484 0x10350, // 10350..1037A; OLD_PERMIC
6485 0x1037B, // 1037B..1037F; UNKNOWN
6486 0x10380, // 10380..1039D; UGARITIC
6487 0x1039E, // 1039E ; UNKNOWN
6488 0x1039F, // 1039F ; UGARITIC
6489 0x103A0, // 103A0..103C3; OLD_PERSIAN
6490 0x103C4, // 103C4..103C7; UNKNOWN
6491 0x103C8, // 103C8..103D5; OLD_PERSIAN
6492 0x103D6, // 103D6..103FF; UNKNOWN
6493 0x10400, // 10400..1044F; DESERET
6494 0x10450, // 10450..1047F; SHAVIAN
6495 0x10480, // 10480..1049D; OSMANYA
6496 0x1049E, // 1049E..1049F; UNKNOWN
6497 0x104A0, // 104A0..104A9; OSMANYA
6498 0x104AA, // 104AA..104AF; UNKNOWN
6499 0x104B0, // 104B0..104D3; OSAGE
6500 0x104D4, // 104D4..104D7; UNKNOWN
6501 0x104D8, // 104D8..104FB; OSAGE
6502 0x104FC, // 104FC..104FF; UNKNOWN
6503 0x10500, // 10500..10527; ELBASAN
6504 0x10528, // 10528..1052F; UNKNOWN
6505 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN
6506 0x10564, // 10564..1056E; UNKNOWN
6507 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN
6508 0x10570, // 10570..1057A; VITHKUQI
6509 0x1057B, // 1057B ; UNKNOWN
6510 0x1057C, // 1057C..1058A; VITHKUQI
6511 0x1058B, // 1058B ; UNKNOWN
6512 0x1058C, // 1058C..10592; VITHKUQI
6513 0x10593, // 10593 ; UNKNOWN
6514 0x10594, // 10594..10595; VITHKUQI
6515 0x10596, // 10596 ; UNKNOWN
6516 0x10597, // 10597..105A1; VITHKUQI
6517 0x105A2, // 105A2 ; UNKNOWN
6518 0x105A3, // 105A3..105B1; VITHKUQI
6519 0x105B2, // 105B2 ; UNKNOWN
6520 0x105B3, // 105B3..105B9; VITHKUQI
6521 0x105BA, // 105BA ; UNKNOWN
6522 0x105BB, // 105BB..105BC; VITHKUQI
6523 0x105BD, // 105BD..105BF; UNKNOWN
6524 0x105C0, // 105C0..105F3; TODHRI
6525 0x105F4, // 105F4..105FF; UNKNOWN
6526 0x10600, // 10600..10736; LINEAR_A
6527 0x10737, // 10737..1073F; UNKNOWN
6528 0x10740, // 10740..10755; LINEAR_A
6529 0x10756, // 10756..1075F; UNKNOWN
6530 0x10760, // 10760..10767; LINEAR_A
6531 0x10768, // 10768..1077F; UNKNOWN
6532 0x10780, // 10780..10785; LATIN
6533 0x10786, // 10786 ; UNKNOWN
6534 0x10787, // 10787..107B0; LATIN
6535 0x107B1, // 107B1 ; UNKNOWN
6536 0x107B2, // 107B2..107BA; LATIN
6537 0x107BB, // 107BB..107FF; UNKNOWN
6538 0x10800, // 10800..10805; CYPRIOT
6539 0x10806, // 10806..10807; UNKNOWN
6540 0x10808, // 10808 ; CYPRIOT
6541 0x10809, // 10809 ; UNKNOWN
6542 0x1080A, // 1080A..10835; CYPRIOT
6543 0x10836, // 10836 ; UNKNOWN
6544 0x10837, // 10837..10838; CYPRIOT
6545 0x10839, // 10839..1083B; UNKNOWN
6546 0x1083C, // 1083C ; CYPRIOT
6547 0x1083D, // 1083D..1083E; UNKNOWN
6548 0x1083F, // 1083F ; CYPRIOT
6549 0x10840, // 10840..10855; IMPERIAL_ARAMAIC
6550 0x10856, // 10856 ; UNKNOWN
6551 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC
6552 0x10860, // 10860..1087F; PALMYRENE
6553 0x10880, // 10880..1089E; NABATAEAN
6554 0x1089F, // 1089F..108A6; UNKNOWN
6555 0x108A7, // 108A7..108AF; NABATAEAN
6556 0x108B0, // 108B0..108DF; UNKNOWN
6557 0x108E0, // 108E0..108F2; HATRAN
6558 0x108F3, // 108F3 ; UNKNOWN
6559 0x108F4, // 108F4..108F5; HATRAN
6560 0x108F6, // 108F6..108FA; UNKNOWN
6561 0x108FB, // 108FB..108FF; HATRAN
6562 0x10900, // 10900..1091B; PHOENICIAN
6563 0x1091C, // 1091C..1091E; UNKNOWN
6564 0x1091F, // 1091F ; PHOENICIAN
6565 0x10920, // 10920..10939; LYDIAN
6566 0x1093A, // 1093A..1093E; UNKNOWN
6567 0x1093F, // 1093F ; LYDIAN
6568 0x10940, // 10940..1097F; UNKNOWN
6569 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS
6570 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE
6571 0x109B8, // 109B8..109BB; UNKNOWN
6572 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE
6573 0x109D0, // 109D0..109D1; UNKNOWN
6574 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE
6575 0x10A00, // 10A00..10A03; KHAROSHTHI
6576 0x10A04, // 10A04 ; UNKNOWN
6577 0x10A05, // 10A05..10A06; KHAROSHTHI
6578 0x10A07, // 10A07..10A0B; UNKNOWN
6579 0x10A0C, // 10A0C..10A13; KHAROSHTHI
6580 0x10A14, // 10A14 ; UNKNOWN
6581 0x10A15, // 10A15..10A17; KHAROSHTHI
6582 0x10A18, // 10A18 ; UNKNOWN
6583 0x10A19, // 10A19..10A35; KHAROSHTHI
6584 0x10A36, // 10A36..10A37; UNKNOWN
6585 0x10A38, // 10A38..10A3A; KHAROSHTHI
6586 0x10A3B, // 10A3B..10A3E; UNKNOWN
6587 0x10A3F, // 10A3F..10A48; KHAROSHTHI
6588 0x10A49, // 10A49..10A4F; UNKNOWN
6589 0x10A50, // 10A50..10A58; KHAROSHTHI
6590 0x10A59, // 10A59..10A5F; UNKNOWN
6591 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN
6592 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN
6593 0x10AA0, // 10AA0..10ABF; UNKNOWN
6594 0x10AC0, // 10AC0..10AE6; MANICHAEAN
6595 0x10AE7, // 10AE7..10AEA; UNKNOWN
6596 0x10AEB, // 10AEB..10AF6; MANICHAEAN
6597 0x10AF7, // 10AF7..10AFF; UNKNOWN
6598 0x10B00, // 10B00..10B35; AVESTAN
6599 0x10B36, // 10B36..10B38; UNKNOWN
6600 0x10B39, // 10B39..10B3F; AVESTAN
6601 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
6602 0x10B56, // 10B56..10B57; UNKNOWN
6603 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
6604 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
6605 0x10B73, // 10B73..10B77; UNKNOWN
6606 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
6607 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI
6608 0x10B92, // 10B92..10B98; UNKNOWN
6609 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI
6610 0x10B9D, // 10B9D..10BA8; UNKNOWN
6611 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI
6612 0x10BB0, // 10BB0..10BFF; UNKNOWN
6613 0x10C00, // 10C00..10C48; OLD_TURKIC
6614 0x10C49, // 10C49..10C7F; UNKNOWN
6615 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN
6616 0x10CB3, // 10CB3..10CBF; UNKNOWN
6617 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN
6618 0x10CF3, // 10CF3..10CF9; UNKNOWN
6619 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN
6620 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA
6621 0x10D28, // 10D28..10D2F; UNKNOWN
6622 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA
6623 0x10D3A, // 10D3A..10D3F; UNKNOWN
6624 0x10D40, // 10D40..10D65; GARAY
6625 0x10D66, // 10D66..10D68; UNKNOWN
6626 0x10D69, // 10D69..10D85; GARAY
6627 0x10D86, // 10D86..10D8D; UNKNOWN
6628 0x10D8E, // 10D8E..10D8F; GARAY
6629 0x10D90, // 10D90..10E5F; UNKNOWN
6630 0x10E60, // 10E60..10E7E; ARABIC
6631 0x10E7F, // 10E7F ; UNKNOWN
6632 0x10E80, // 10E80..10EA9; YEZIDI
6633 0x10EAA, // 10EAA ; UNKNOWN
6634 0x10EAB, // 10EAB..10EAD; YEZIDI
6635 0x10EAE, // 10EAE..10EAF; UNKNOWN
6636 0x10EB0, // 10EB0..10EB1; YEZIDI
6637 0x10EB2, // 10EB2..10EC1; UNKNOWN
6638 0x10EC2, // 10EC2..10EC4; ARABIC
6639 0x10EC5, // 10EC5..10EFB; UNKNOWN
6640 0x10EFC, // 10EFC..10EFF; ARABIC
6641 0x10F00, // 10F00..10F27; OLD_SOGDIAN
6642 0x10F28, // 10F28..10F2F; UNKNOWN
6643 0x10F30, // 10F30..10F59; SOGDIAN
6644 0x10F5A, // 10F5A..10F6F; UNKNOWN
6645 0x10F70, // 10F70..10F89; OLD_UYGHUR
6646 0x10F8A, // 10F8A..10FAF; UNKNOWN
6647 0x10FB0, // 10FB0..10FCB; CHORASMIAN
6648 0x10FCC, // 10FCC..10FDF; UNKNOWN
6649 0x10FE0, // 10FE0..10FF6; ELYMAIC
6650 0x10FF7, // 10FF7..10FFF; UNKNOWN
6651 0x11000, // 11000..1104D; BRAHMI
6652 0x1104E, // 1104E..11051; UNKNOWN
6653 0x11052, // 11052..11075; BRAHMI
6654 0x11076, // 11076..1107E; UNKNOWN
6655 0x1107F, // 1107F ; BRAHMI
6656 0x11080, // 11080..110C2; KAITHI
6657 0x110C3, // 110C3..110CC; UNKNOWN
6658 0x110CD, // 110CD ; KAITHI
6659 0x110CE, // 110CE..110CF; UNKNOWN
6660 0x110D0, // 110D0..110E8; SORA_SOMPENG
6661 0x110E9, // 110E9..110EF; UNKNOWN
6662 0x110F0, // 110F0..110F9; SORA_SOMPENG
6663 0x110FA, // 110FA..110FF; UNKNOWN
6664 0x11100, // 11100..11134; CHAKMA
6665 0x11135, // 11135 ; UNKNOWN
6666 0x11136, // 11136..11147; CHAKMA
6667 0x11148, // 11148..1114F; UNKNOWN
6668 0x11150, // 11150..11176; MAHAJANI
6669 0x11177, // 11177..1117F; UNKNOWN
6670 0x11180, // 11180..111DF; SHARADA
6671 0x111E0, // 111E0 ; UNKNOWN
6672 0x111E1, // 111E1..111F4; SINHALA
6673 0x111F5, // 111F5..111FF; UNKNOWN
6674 0x11200, // 11200..11211; KHOJKI
6675 0x11212, // 11212 ; UNKNOWN
6676 0x11213, // 11213..11241; KHOJKI
6677 0x11242, // 11242..1127F; UNKNOWN
6678 0x11280, // 11280..11286; MULTANI
6679 0x11287, // 11287 ; UNKNOWN
6680 0x11288, // 11288 ; MULTANI
6681 0x11289, // 11289 ; UNKNOWN
6682 0x1128A, // 1128A..1128D; MULTANI
6683 0x1128E, // 1128E ; UNKNOWN
6684 0x1128F, // 1128F..1129D; MULTANI
6685 0x1129E, // 1129E ; UNKNOWN
6686 0x1129F, // 1129F..112A9; MULTANI
6687 0x112AA, // 112AA..112AF; UNKNOWN
6688 0x112B0, // 112B0..112EA; KHUDAWADI
6689 0x112EB, // 112EB..112EF; UNKNOWN
6690 0x112F0, // 112F0..112F9; KHUDAWADI
6691 0x112FA, // 112FA..112FF; UNKNOWN
6692 0x11300, // 11300..11303; GRANTHA
6693 0x11304, // 11304 ; UNKNOWN
6694 0x11305, // 11305..1130C; GRANTHA
6695 0x1130D, // 1130D..1130E; UNKNOWN
6696 0x1130F, // 1130F..11310; GRANTHA
6697 0x11311, // 11311..11312; UNKNOWN
6698 0x11313, // 11313..11328; GRANTHA
6699 0x11329, // 11329 ; UNKNOWN
6700 0x1132A, // 1132A..11330; GRANTHA
6701 0x11331, // 11331 ; UNKNOWN
6702 0x11332, // 11332..11333; GRANTHA
6703 0x11334, // 11334 ; UNKNOWN
6704 0x11335, // 11335..11339; GRANTHA
6705 0x1133A, // 1133A ; UNKNOWN
6706 0x1133B, // 1133B ; INHERITED
6707 0x1133C, // 1133C..11344; GRANTHA
6708 0x11345, // 11345..11346; UNKNOWN
6709 0x11347, // 11347..11348; GRANTHA
6710 0x11349, // 11349..1134A; UNKNOWN
6711 0x1134B, // 1134B..1134D; GRANTHA
6712 0x1134E, // 1134E..1134F; UNKNOWN
6713 0x11350, // 11350 ; GRANTHA
6714 0x11351, // 11351..11356; UNKNOWN
6715 0x11357, // 11357 ; GRANTHA
6716 0x11358, // 11358..1135C; UNKNOWN
6717 0x1135D, // 1135D..11363; GRANTHA
6718 0x11364, // 11364..11365; UNKNOWN
6719 0x11366, // 11366..1136C; GRANTHA
6720 0x1136D, // 1136D..1136F; UNKNOWN
6721 0x11370, // 11370..11374; GRANTHA
6722 0x11375, // 11375..1137F; UNKNOWN
6723 0x11380, // 11380..11389; TULU_TIGALARI
6724 0x1138A, // 1138A ; UNKNOWN
6725 0x1138B, // 1138B ; TULU_TIGALARI
6726 0x1138C, // 1138C..1138D; UNKNOWN
6727 0x1138E, // 1138E ; TULU_TIGALARI
6728 0x1138F, // 1138F ; UNKNOWN
6729 0x11390, // 11390..113B5; TULU_TIGALARI
6730 0x113B6, // 113B6 ; UNKNOWN
6731 0x113B7, // 113B7..113C0; TULU_TIGALARI
6732 0x113C1, // 113C1 ; UNKNOWN
6733 0x113C2, // 113C2 ; TULU_TIGALARI
6734 0x113C3, // 113C3..113C4; UNKNOWN
6735 0x113C5, // 113C5 ; TULU_TIGALARI
6736 0x113C6, // 113C6 ; UNKNOWN
6737 0x113C7, // 113C7..113CA; TULU_TIGALARI
6738 0x113CB, // 113CB ; UNKNOWN
6739 0x113CC, // 113CC..113D5; TULU_TIGALARI
6740 0x113D6, // 113D6 ; UNKNOWN
6741 0x113D7, // 113D7..113D8; TULU_TIGALARI
6742 0x113D9, // 113D9..113E0; UNKNOWN
6743 0x113E1, // 113E1..113E2; TULU_TIGALARI
6744 0x113E3, // 113E3..113FF; UNKNOWN
6745 0x11400, // 11400..1145B; NEWA
6746 0x1145C, // 1145C ; UNKNOWN
6747 0x1145D, // 1145D..11461; NEWA
6748 0x11462, // 11462..1147F; UNKNOWN
6749 0x11480, // 11480..114C7; TIRHUTA
6750 0x114C8, // 114C8..114CF; UNKNOWN
6751 0x114D0, // 114D0..114D9; TIRHUTA
6752 0x114DA, // 114DA..1157F; UNKNOWN
6753 0x11580, // 11580..115B5; SIDDHAM
6754 0x115B6, // 115B6..115B7; UNKNOWN
6755 0x115B8, // 115B8..115DD; SIDDHAM
6756 0x115DE, // 115DE..115FF; UNKNOWN
6757 0x11600, // 11600..11644; MODI
6758 0x11645, // 11645..1164F; UNKNOWN
6759 0x11650, // 11650..11659; MODI
6760 0x1165A, // 1165A..1165F; UNKNOWN
6761 0x11660, // 11660..1166C; MONGOLIAN
6762 0x1166D, // 1166D..1167F; UNKNOWN
6763 0x11680, // 11680..116B9; TAKRI
6764 0x116BA, // 116BA..116BF; UNKNOWN
6765 0x116C0, // 116C0..116C9; TAKRI
6766 0x116CA, // 116CA..116CF; UNKNOWN
6767 0x116D0, // 116D0..116E3; MYANMAR
6768 0x116E4, // 116E4..116FF; UNKNOWN
6769 0x11700, // 11700..1171A; AHOM
6770 0x1171B, // 1171B..1171C; UNKNOWN
6771 0x1171D, // 1171D..1172B; AHOM
6772 0x1172C, // 1172C..1172F; UNKNOWN
6773 0x11730, // 11730..11746; AHOM
6774 0x11747, // 11747..117FF; UNKNOWN
6775 0x11800, // 11800..1183B; DOGRA
6776 0x1183C, // 1183C..1189F; UNKNOWN
6777 0x118A0, // 118A0..118F2; WARANG_CITI
6778 0x118F3, // 118F3..118FE; UNKNOWN
6779 0x118FF, // 118FF ; WARANG_CITI
6780 0x11900, // 11900..11906; DIVES_AKURU
6781 0x11907, // 11907..11908; UNKNOWN
6782 0x11909, // 11909 ; DIVES_AKURU
6783 0x1190A, // 1190A..1190B; UNKNOWN
6784 0x1190C, // 1190C..11913; DIVES_AKURU
6785 0x11914, // 11914 ; UNKNOWN
6786 0x11915, // 11915..11916; DIVES_AKURU
6787 0x11917, // 11917 ; UNKNOWN
6788 0x11918, // 11918..11935; DIVES_AKURU
6789 0x11936, // 11936 ; UNKNOWN
6790 0x11937, // 11937..11938; DIVES_AKURU
6791 0x11939, // 11939..1193A; UNKNOWN
6792 0x1193B, // 1193B..11946; DIVES_AKURU
6793 0x11947, // 11947..1194F; UNKNOWN
6794 0x11950, // 11950..11959; DIVES_AKURU
6795 0x1195A, // 1195A..1199F; UNKNOWN
6796 0x119A0, // 119A0..119A7; NANDINAGARI
6797 0x119A8, // 119A8..119A9; UNKNOWN
6798 0x119AA, // 119AA..119D7; NANDINAGARI
6799 0x119D8, // 119D8..119D9; UNKNOWN
6800 0x119DA, // 119DA..119E4; NANDINAGARI
6801 0x119E5, // 119E5..119FF; UNKNOWN
6802 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE
6803 0x11A48, // 11A48..11A4F; UNKNOWN
6804 0x11A50, // 11A50..11AA2; SOYOMBO
6805 0x11AA3, // 11AA3..11AAF; UNKNOWN
6806 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL
6807 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU
6808 0x11AF9, // 11AF9..11AFF; UNKNOWN
6809 0x11B00, // 11B00..11B09; DEVANAGARI
6810 0x11B0A, // 11B0A..11BBF; UNKNOWN
6811 0x11BC0, // 11BC0..11BE1; SUNUWAR
6812 0x11BE2, // 11BE2..11BEF; UNKNOWN
6813 0x11BF0, // 11BF0..11BF9; SUNUWAR
6814 0x11BFA, // 11BFA..11BFF; UNKNOWN
6815 0x11C00, // 11C00..11C08; BHAIKSUKI
6816 0x11C09, // 11C09 ; UNKNOWN
6817 0x11C0A, // 11C0A..11C36; BHAIKSUKI
6818 0x11C37, // 11C37 ; UNKNOWN
6819 0x11C38, // 11C38..11C45; BHAIKSUKI
6820 0x11C46, // 11C46..11C4F; UNKNOWN
6821 0x11C50, // 11C50..11C6C; BHAIKSUKI
6822 0x11C6D, // 11C6D..11C6F; UNKNOWN
6823 0x11C70, // 11C70..11C8F; MARCHEN
6824 0x11C90, // 11C90..11C91; UNKNOWN
6825 0x11C92, // 11C92..11CA7; MARCHEN
6826 0x11CA8, // 11CA8 ; UNKNOWN
6827 0x11CA9, // 11CA9..11CB6; MARCHEN
6828 0x11CB7, // 11CB7..11CFF; UNKNOWN
6829 0x11D00, // 11D00..11D06; MASARAM_GONDI
6830 0x11D07, // 11D07 ; UNKNOWN
6831 0x11D08, // 11D08..11D09; MASARAM_GONDI
6832 0x11D0A, // 11D0A ; UNKNOWN
6833 0x11D0B, // 11D0B..11D36; MASARAM_GONDI
6834 0x11D37, // 11D37..11D39; UNKNOWN
6835 0x11D3A, // 11D3A ; MASARAM_GONDI
6836 0x11D3B, // 11D3B ; UNKNOWN
6837 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI
6838 0x11D3E, // 11D3E ; UNKNOWN
6839 0x11D3F, // 11D3F..11D47; MASARAM_GONDI
6840 0x11D48, // 11D48..11D4F; UNKNOWN
6841 0x11D50, // 11D50..11D59; MASARAM_GONDI
6842 0x11D5A, // 11D5A..11D5F; UNKNOWN
6843 0x11D60, // 11D60..11D65; GUNJALA_GONDI
6844 0x11D66, // 11D66 ; UNKNOWN
6845 0x11D67, // 11D67..11D68; GUNJALA_GONDI
6846 0x11D69, // 11D69 ; UNKNOWN
6847 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI
6848 0x11D8F, // 11D8F ; UNKNOWN
6849 0x11D90, // 11D90..11D91; GUNJALA_GONDI
6850 0x11D92, // 11D92 ; UNKNOWN
6851 0x11D93, // 11D93..11D98; GUNJALA_GONDI
6852 0x11D99, // 11D99..11D9F; UNKNOWN
6853 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI
6854 0x11DAA, // 11DAA..11EDF; UNKNOWN
6855 0x11EE0, // 11EE0..11EF8; MAKASAR
6856 0x11EF9, // 11EF9..11EFF; UNKNOWN
6857 0x11F00, // 11F00..11F10; KAWI
6858 0x11F11, // 11F11 ; UNKNOWN
6859 0x11F12, // 11F12..11F3A; KAWI
6860 0x11F3B, // 11F3B..11F3D; UNKNOWN
6861 0x11F3E, // 11F3E..11F5A; KAWI
6862 0x11F5B, // 11F5B..11FAF; UNKNOWN
6863 0x11FB0, // 11FB0 ; LISU
6864 0x11FB1, // 11FB1..11FBF; UNKNOWN
6865 0x11FC0, // 11FC0..11FF1; TAMIL
6866 0x11FF2, // 11FF2..11FFE; UNKNOWN
6867 0x11FFF, // 11FFF ; TAMIL
6868 0x12000, // 12000..12399; CUNEIFORM
6869 0x1239A, // 1239A..123FF; UNKNOWN
6870 0x12400, // 12400..1246E; CUNEIFORM
6871 0x1246F, // 1246F ; UNKNOWN
6872 0x12470, // 12470..12474; CUNEIFORM
6873 0x12475, // 12475..1247F; UNKNOWN
6874 0x12480, // 12480..12543; CUNEIFORM
6875 0x12544, // 12544..12F8F; UNKNOWN
6876 0x12F90, // 12F90..12FF2; CYPRO_MINOAN
6877 0x12FF3, // 12FF3..12FFF; UNKNOWN
6878 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS
6879 0x13456, // 13456..1345F; UNKNOWN
6880 0x13460, // 13460..143FA; EGYPTIAN_HIEROGLYPHS
6881 0x143FB, // 143FB..143FF; UNKNOWN
6882 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS
6883 0x14647, // 14647..160FF; UNKNOWN
6884 0x16100, // 16100..16139; GURUNG_KHEMA
6885 0x1613A, // 1613A..167FF; UNKNOWN
6886 0x16800, // 16800..16A38; BAMUM
6887 0x16A39, // 16A39..16A3F; UNKNOWN
6888 0x16A40, // 16A40..16A5E; MRO
6889 0x16A5F, // 16A5F ; UNKNOWN
6890 0x16A60, // 16A60..16A69; MRO
6891 0x16A6A, // 16A6A..16A6D; UNKNOWN
6892 0x16A6E, // 16A6E..16A6F; MRO
6893 0x16A70, // 16A70..16ABE; TANGSA
6894 0x16ABF, // 16ABF ; UNKNOWN
6895 0x16AC0, // 16AC0..16AC9; TANGSA
6896 0x16ACA, // 16ACA..16ACF; UNKNOWN
6897 0x16AD0, // 16AD0..16AED; BASSA_VAH
6898 0x16AEE, // 16AEE..16AEF; UNKNOWN
6899 0x16AF0, // 16AF0..16AF5; BASSA_VAH
6900 0x16AF6, // 16AF6..16AFF; UNKNOWN
6901 0x16B00, // 16B00..16B45; PAHAWH_HMONG
6902 0x16B46, // 16B46..16B4F; UNKNOWN
6903 0x16B50, // 16B50..16B59; PAHAWH_HMONG
6904 0x16B5A, // 16B5A ; UNKNOWN
6905 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG
6906 0x16B62, // 16B62 ; UNKNOWN
6907 0x16B63, // 16B63..16B77; PAHAWH_HMONG
6908 0x16B78, // 16B78..16B7C; UNKNOWN
6909 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG
6910 0x16B90, // 16B90..16D3F; UNKNOWN
6911 0x16D40, // 16D40..16D79; KIRAT_RAI
6912 0x16D7A, // 16D7A..16E3F; UNKNOWN
6913 0x16E40, // 16E40..16E9A; MEDEFAIDRIN
6914 0x16E9B, // 16E9B..16EFF; UNKNOWN
6915 0x16F00, // 16F00..16F4A; MIAO
6916 0x16F4B, // 16F4B..16F4E; UNKNOWN
6917 0x16F4F, // 16F4F..16F87; MIAO
6918 0x16F88, // 16F88..16F8E; UNKNOWN
6919 0x16F8F, // 16F8F..16F9F; MIAO
6920 0x16FA0, // 16FA0..16FDF; UNKNOWN
6921 0x16FE0, // 16FE0 ; TANGUT
6922 0x16FE1, // 16FE1 ; NUSHU
6923 0x16FE2, // 16FE2..16FE3; HAN
6924 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT
6925 0x16FE5, // 16FE5..16FEF; UNKNOWN
6926 0x16FF0, // 16FF0..16FF1; HAN
6927 0x16FF2, // 16FF2..16FFF; UNKNOWN
6928 0x17000, // 17000..187F7; TANGUT
6929 0x187F8, // 187F8..187FF; UNKNOWN
6930 0x18800, // 18800..18AFF; TANGUT
6931 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT
6932 0x18CD6, // 18CD6..18CFE; UNKNOWN
6933 0x18CFF, // 18CFF ; KHITAN_SMALL_SCRIPT
6934 0x18D00, // 18D00..18D08; TANGUT
6935 0x18D09, // 18D09..1AFEF; UNKNOWN
6936 0x1AFF0, // 1AFF0..1AFF3; KATAKANA
6937 0x1AFF4, // 1AFF4 ; UNKNOWN
6938 0x1AFF5, // 1AFF5..1AFFB; KATAKANA
6939 0x1AFFC, // 1AFFC ; UNKNOWN
6940 0x1AFFD, // 1AFFD..1AFFE; KATAKANA
6941 0x1AFFF, // 1AFFF ; UNKNOWN
6942 0x1B000, // 1B000 ; KATAKANA
6943 0x1B001, // 1B001..1B11F; HIRAGANA
6944 0x1B120, // 1B120..1B122; KATAKANA
6945 0x1B123, // 1B123..1B131; UNKNOWN
6946 0x1B132, // 1B132 ; HIRAGANA
6947 0x1B133, // 1B133..1B14F; UNKNOWN
6948 0x1B150, // 1B150..1B152; HIRAGANA
6949 0x1B153, // 1B153..1B154; UNKNOWN
6950 0x1B155, // 1B155 ; KATAKANA
6951 0x1B156, // 1B156..1B163; UNKNOWN
6952 0x1B164, // 1B164..1B167; KATAKANA
6953 0x1B168, // 1B168..1B16F; UNKNOWN
6954 0x1B170, // 1B170..1B2FB; NUSHU
6955 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN
6956 0x1BC00, // 1BC00..1BC6A; DUPLOYAN
6957 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN
6958 0x1BC70, // 1BC70..1BC7C; DUPLOYAN
6959 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN
6960 0x1BC80, // 1BC80..1BC88; DUPLOYAN
6961 0x1BC89, // 1BC89..1BC8F; UNKNOWN
6962 0x1BC90, // 1BC90..1BC99; DUPLOYAN
6963 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN
6964 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN
6965 0x1BCA0, // 1BCA0..1BCA3; COMMON
6966 0x1BCA4, // 1BCA4..1CBFF; UNKNOWN
6967 0x1CC00, // 1CC00..1CCF9; COMMON
6968 0x1CCFA, // 1CCFA..1CCFF; UNKNOWN
6969 0x1CD00, // 1CD00..1CEB3; COMMON
6970 0x1CEB4, // 1CEB4..1CEFF; UNKNOWN
6971 0x1CF00, // 1CF00..1CF2D; INHERITED
6972 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN
6973 0x1CF30, // 1CF30..1CF46; INHERITED
6974 0x1CF47, // 1CF47..1CF4F; UNKNOWN
6975 0x1CF50, // 1CF50..1CFC3; COMMON
6976 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN
6977 0x1D000, // 1D000..1D0F5; COMMON
6978 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN
6979 0x1D100, // 1D100..1D126; COMMON
6980 0x1D127, // 1D127..1D128; UNKNOWN
6981 0x1D129, // 1D129..1D166; COMMON
6982 0x1D167, // 1D167..1D169; INHERITED
6983 0x1D16A, // 1D16A..1D17A; COMMON
6984 0x1D17B, // 1D17B..1D182; INHERITED
6985 0x1D183, // 1D183..1D184; COMMON
6986 0x1D185, // 1D185..1D18B; INHERITED
6987 0x1D18C, // 1D18C..1D1A9; COMMON
6988 0x1D1AA, // 1D1AA..1D1AD; INHERITED
6989 0x1D1AE, // 1D1AE..1D1EA; COMMON
6990 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN
6991 0x1D200, // 1D200..1D245; GREEK
6992 0x1D246, // 1D246..1D2BF; UNKNOWN
6993 0x1D2C0, // 1D2C0..1D2D3; COMMON
6994 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN
6995 0x1D2E0, // 1D2E0..1D2F3; COMMON
6996 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN
6997 0x1D300, // 1D300..1D356; COMMON
6998 0x1D357, // 1D357..1D35F; UNKNOWN
6999 0x1D360, // 1D360..1D378; COMMON
7000 0x1D379, // 1D379..1D3FF; UNKNOWN
7001 0x1D400, // 1D400..1D454; COMMON
7002 0x1D455, // 1D455 ; UNKNOWN
7003 0x1D456, // 1D456..1D49C; COMMON
7004 0x1D49D, // 1D49D ; UNKNOWN
7005 0x1D49E, // 1D49E..1D49F; COMMON
7006 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN
7007 0x1D4A2, // 1D4A2 ; COMMON
7008 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN
7009 0x1D4A5, // 1D4A5..1D4A6; COMMON
7010 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN
7011 0x1D4A9, // 1D4A9..1D4AC; COMMON
7012 0x1D4AD, // 1D4AD ; UNKNOWN
7013 0x1D4AE, // 1D4AE..1D4B9; COMMON
7014 0x1D4BA, // 1D4BA ; UNKNOWN
7015 0x1D4BB, // 1D4BB ; COMMON
7016 0x1D4BC, // 1D4BC ; UNKNOWN
7017 0x1D4BD, // 1D4BD..1D4C3; COMMON
7018 0x1D4C4, // 1D4C4 ; UNKNOWN
7019 0x1D4C5, // 1D4C5..1D505; COMMON
7020 0x1D506, // 1D506 ; UNKNOWN
7021 0x1D507, // 1D507..1D50A; COMMON
7022 0x1D50B, // 1D50B..1D50C; UNKNOWN
7023 0x1D50D, // 1D50D..1D514; COMMON
7024 0x1D515, // 1D515 ; UNKNOWN
7025 0x1D516, // 1D516..1D51C; COMMON
7026 0x1D51D, // 1D51D ; UNKNOWN
7027 0x1D51E, // 1D51E..1D539; COMMON
7028 0x1D53A, // 1D53A ; UNKNOWN
7029 0x1D53B, // 1D53B..1D53E; COMMON
7030 0x1D53F, // 1D53F ; UNKNOWN
7031 0x1D540, // 1D540..1D544; COMMON
7032 0x1D545, // 1D545 ; UNKNOWN
7033 0x1D546, // 1D546 ; COMMON
7034 0x1D547, // 1D547..1D549; UNKNOWN
7035 0x1D54A, // 1D54A..1D550; COMMON
7036 0x1D551, // 1D551 ; UNKNOWN
7037 0x1D552, // 1D552..1D6A5; COMMON
7038 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN
7039 0x1D6A8, // 1D6A8..1D7CB; COMMON
7040 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN
7041 0x1D7CE, // 1D7CE..1D7FF; COMMON
7042 0x1D800, // 1D800..1DA8B; SIGNWRITING
7043 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN
7044 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING
7045 0x1DAA0, // 1DAA0 ; UNKNOWN
7046 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING
7047 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN
7048 0x1DF00, // 1DF00..1DF1E; LATIN
7049 0x1DF1F, // 1DF1F..1DF24; UNKNOWN
7050 0x1DF25, // 1DF25..1DF2A; LATIN
7051 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN
7052 0x1E000, // 1E000..1E006; GLAGOLITIC
7053 0x1E007, // 1E007 ; UNKNOWN
7054 0x1E008, // 1E008..1E018; GLAGOLITIC
7055 0x1E019, // 1E019..1E01A; UNKNOWN
7056 0x1E01B, // 1E01B..1E021; GLAGOLITIC
7057 0x1E022, // 1E022 ; UNKNOWN
7058 0x1E023, // 1E023..1E024; GLAGOLITIC
7059 0x1E025, // 1E025 ; UNKNOWN
7060 0x1E026, // 1E026..1E02A; GLAGOLITIC
7061 0x1E02B, // 1E02B..1E02F; UNKNOWN
7062 0x1E030, // 1E030..1E06D; CYRILLIC
7063 0x1E06E, // 1E06E..1E08E; UNKNOWN
7064 0x1E08F, // 1E08F ; CYRILLIC
7065 0x1E090, // 1E090..1E0FF; UNKNOWN
7066 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
7067 0x1E12D, // 1E12D..1E12F; UNKNOWN
7068 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
7069 0x1E13E, // 1E13E..1E13F; UNKNOWN
7070 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
7071 0x1E14A, // 1E14A..1E14D; UNKNOWN
7072 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
7073 0x1E150, // 1E150..1E28F; UNKNOWN
7074 0x1E290, // 1E290..1E2AE; TOTO
7075 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN
7076 0x1E2C0, // 1E2C0..1E2F9; WANCHO
7077 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN
7078 0x1E2FF, // 1E2FF ; WANCHO
7079 0x1E300, // 1E300..1E4CF; UNKNOWN
7080 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI
7081 0x1E4FA, // 1E4FA..1E5CF; UNKNOWN
7082 0x1E5D0, // 1E5D0..1E5FA; OL_ONAL
7083 0x1E5FB, // 1E5FB..1E5FE; UNKNOWN
7084 0x1E5FF, // 1E5FF ; OL_ONAL
7085 0x1E600, // 1E600..1E7DF; UNKNOWN
7086 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC
7087 0x1E7E7, // 1E7E7 ; UNKNOWN
7088 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC
7089 0x1E7EC, // 1E7EC ; UNKNOWN
7090 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC
7091 0x1E7EF, // 1E7EF ; UNKNOWN
7092 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC
7093 0x1E7FF, // 1E7FF ; UNKNOWN
7094 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI
7095 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN
7096 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI
7097 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN
7098 0x1E900, // 1E900..1E94B; ADLAM
7099 0x1E94C, // 1E94C..1E94F; UNKNOWN
7100 0x1E950, // 1E950..1E959; ADLAM
7101 0x1E95A, // 1E95A..1E95D; UNKNOWN
7102 0x1E95E, // 1E95E..1E95F; ADLAM
7103 0x1E960, // 1E960..1EC70; UNKNOWN
7104 0x1EC71, // 1EC71..1ECB4; COMMON
7105 0x1ECB5, // 1ECB5..1ED00; UNKNOWN
7106 0x1ED01, // 1ED01..1ED3D; COMMON
7107 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN
7108 0x1EE00, // 1EE00..1EE03; ARABIC
7109 0x1EE04, // 1EE04 ; UNKNOWN
7110 0x1EE05, // 1EE05..1EE1F; ARABIC
7111 0x1EE20, // 1EE20 ; UNKNOWN
7112 0x1EE21, // 1EE21..1EE22; ARABIC
7113 0x1EE23, // 1EE23 ; UNKNOWN
7114 0x1EE24, // 1EE24 ; ARABIC
7115 0x1EE25, // 1EE25..1EE26; UNKNOWN
7116 0x1EE27, // 1EE27 ; ARABIC
7117 0x1EE28, // 1EE28 ; UNKNOWN
7118 0x1EE29, // 1EE29..1EE32; ARABIC
7119 0x1EE33, // 1EE33 ; UNKNOWN
7120 0x1EE34, // 1EE34..1EE37; ARABIC
7121 0x1EE38, // 1EE38 ; UNKNOWN
7122 0x1EE39, // 1EE39 ; ARABIC
7123 0x1EE3A, // 1EE3A ; UNKNOWN
7124 0x1EE3B, // 1EE3B ; ARABIC
7125 0x1EE3C, // 1EE3C..1EE41; UNKNOWN
7126 0x1EE42, // 1EE42 ; ARABIC
7127 0x1EE43, // 1EE43..1EE46; UNKNOWN
7128 0x1EE47, // 1EE47 ; ARABIC
7129 0x1EE48, // 1EE48 ; UNKNOWN
7130 0x1EE49, // 1EE49 ; ARABIC
7131 0x1EE4A, // 1EE4A ; UNKNOWN
7132 0x1EE4B, // 1EE4B ; ARABIC
7133 0x1EE4C, // 1EE4C ; UNKNOWN
7134 0x1EE4D, // 1EE4D..1EE4F; ARABIC
7135 0x1EE50, // 1EE50 ; UNKNOWN
7136 0x1EE51, // 1EE51..1EE52; ARABIC
7137 0x1EE53, // 1EE53 ; UNKNOWN
7138 0x1EE54, // 1EE54 ; ARABIC
7139 0x1EE55, // 1EE55..1EE56; UNKNOWN
7140 0x1EE57, // 1EE57 ; ARABIC
7141 0x1EE58, // 1EE58 ; UNKNOWN
7142 0x1EE59, // 1EE59 ; ARABIC
7143 0x1EE5A, // 1EE5A ; UNKNOWN
7144 0x1EE5B, // 1EE5B ; ARABIC
7145 0x1EE5C, // 1EE5C ; UNKNOWN
7146 0x1EE5D, // 1EE5D ; ARABIC
7147 0x1EE5E, // 1EE5E ; UNKNOWN
7148 0x1EE5F, // 1EE5F ; ARABIC
7149 0x1EE60, // 1EE60 ; UNKNOWN
7150 0x1EE61, // 1EE61..1EE62; ARABIC
7151 0x1EE63, // 1EE63 ; UNKNOWN
7152 0x1EE64, // 1EE64 ; ARABIC
7153 0x1EE65, // 1EE65..1EE66; UNKNOWN
7154 0x1EE67, // 1EE67..1EE6A; ARABIC
7155 0x1EE6B, // 1EE6B ; UNKNOWN
7156 0x1EE6C, // 1EE6C..1EE72; ARABIC
7157 0x1EE73, // 1EE73 ; UNKNOWN
7158 0x1EE74, // 1EE74..1EE77; ARABIC
7159 0x1EE78, // 1EE78 ; UNKNOWN
7160 0x1EE79, // 1EE79..1EE7C; ARABIC
7161 0x1EE7D, // 1EE7D ; UNKNOWN
7162 0x1EE7E, // 1EE7E ; ARABIC
7163 0x1EE7F, // 1EE7F ; UNKNOWN
7164 0x1EE80, // 1EE80..1EE89; ARABIC
7165 0x1EE8A, // 1EE8A ; UNKNOWN
7166 0x1EE8B, // 1EE8B..1EE9B; ARABIC
7167 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN
7168 0x1EEA1, // 1EEA1..1EEA3; ARABIC
7169 0x1EEA4, // 1EEA4 ; UNKNOWN
7170 0x1EEA5, // 1EEA5..1EEA9; ARABIC
7171 0x1EEAA, // 1EEAA ; UNKNOWN
7172 0x1EEAB, // 1EEAB..1EEBB; ARABIC
7173 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN
7174 0x1EEF0, // 1EEF0..1EEF1; ARABIC
7175 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN
7176 0x1F000, // 1F000..1F02B; COMMON
7177 0x1F02C, // 1F02C..1F02F; UNKNOWN
7178 0x1F030, // 1F030..1F093; COMMON
7179 0x1F094, // 1F094..1F09F; UNKNOWN
7180 0x1F0A0, // 1F0A0..1F0AE; COMMON
7181 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN
7182 0x1F0B1, // 1F0B1..1F0BF; COMMON
7183 0x1F0C0, // 1F0C0 ; UNKNOWN
7184 0x1F0C1, // 1F0C1..1F0CF; COMMON
7185 0x1F0D0, // 1F0D0 ; UNKNOWN
7186 0x1F0D1, // 1F0D1..1F0F5; COMMON
7187 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN
7188 0x1F100, // 1F100..1F1AD; COMMON
7189 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN
7190 0x1F1E6, // 1F1E6..1F1FF; COMMON
7191 0x1F200, // 1F200 ; HIRAGANA
7192 0x1F201, // 1F201..1F202; COMMON
7193 0x1F203, // 1F203..1F20F; UNKNOWN
7194 0x1F210, // 1F210..1F23B; COMMON
7195 0x1F23C, // 1F23C..1F23F; UNKNOWN
7196 0x1F240, // 1F240..1F248; COMMON
7197 0x1F249, // 1F249..1F24F; UNKNOWN
7198 0x1F250, // 1F250..1F251; COMMON
7199 0x1F252, // 1F252..1F25F; UNKNOWN
7200 0x1F260, // 1F260..1F265; COMMON
7201 0x1F266, // 1F266..1F2FF; UNKNOWN
7202 0x1F300, // 1F300..1F6D7; COMMON
7203 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN
7204 0x1F6DC, // 1F6DC..1F6EC; COMMON
7205 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN
7206 0x1F6F0, // 1F6F0..1F6FC; COMMON
7207 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN
7208 0x1F700, // 1F700..1F776; COMMON
7209 0x1F777, // 1F777..1F77A; UNKNOWN
7210 0x1F77B, // 1F77B..1F7D9; COMMON
7211 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN
7212 0x1F7E0, // 1F7E0..1F7EB; COMMON
7213 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN
7214 0x1F7F0, // 1F7F0 ; COMMON
7215 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN
7216 0x1F800, // 1F800..1F80B; COMMON
7217 0x1F80C, // 1F80C..1F80F; UNKNOWN
7218 0x1F810, // 1F810..1F847; COMMON
7219 0x1F848, // 1F848..1F84F; UNKNOWN
7220 0x1F850, // 1F850..1F859; COMMON
7221 0x1F85A, // 1F85A..1F85F; UNKNOWN
7222 0x1F860, // 1F860..1F887; COMMON
7223 0x1F888, // 1F888..1F88F; UNKNOWN
7224 0x1F890, // 1F890..1F8AD; COMMON
7225 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN
7226 0x1F8B0, // 1F8B0..1F8BB; COMMON
7227 0x1F8BC, // 1F8BC..1F8BF; UNKNOWN
7228 0x1F8C0, // 1F8C0..1F8C1; COMMON
7229 0x1F8C2, // 1F8C2..1F8FF; UNKNOWN
7230 0x1F900, // 1F900..1FA53; COMMON
7231 0x1FA54, // 1FA54..1FA5F; UNKNOWN
7232 0x1FA60, // 1FA60..1FA6D; COMMON
7233 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN
7234 0x1FA70, // 1FA70..1FA7C; COMMON
7235 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN
7236 0x1FA80, // 1FA80..1FA89; COMMON
7237 0x1FA8A, // 1FA8A..1FA8E; UNKNOWN
7238 0x1FA8F, // 1FA8F..1FAC6; COMMON
7239 0x1FAC7, // 1FAC7..1FACD; UNKNOWN
7240 0x1FACE, // 1FACE..1FADC; COMMON
7241 0x1FADD, // 1FADD..1FADE; UNKNOWN
7242 0x1FADF, // 1FADF..1FAE9; COMMON
7243 0x1FAEA, // 1FAEA..1FAEF; UNKNOWN
7244 0x1FAF0, // 1FAF0..1FAF8; COMMON
7245 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN
7246 0x1FB00, // 1FB00..1FB92; COMMON
7247 0x1FB93, // 1FB93 ; UNKNOWN
7248 0x1FB94, // 1FB94..1FBF9; COMMON
7249 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN
7250 0x20000, // 20000..2A6DF; HAN
7251 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN
7252 0x2A700, // 2A700..2B739; HAN
7253 0x2B73A, // 2B73A..2B73F; UNKNOWN
7254 0x2B740, // 2B740..2B81D; HAN
7255 0x2B81E, // 2B81E..2B81F; UNKNOWN
7256 0x2B820, // 2B820..2CEA1; HAN
7257 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN
7258 0x2CEB0, // 2CEB0..2EBE0; HAN
7259 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN
7260 0x2EBF0, // 2EBF0..2EE5D; HAN
7261 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN
7262 0x2F800, // 2F800..2FA1D; HAN
7263 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN
7264 0x30000, // 30000..3134A; HAN
7265 0x3134B, // 3134B..3134F; UNKNOWN
7266 0x31350, // 31350..323AF; HAN
7267 0x323B0, // 323B0..E0000; UNKNOWN
7268 0xE0001, // E0001 ; COMMON
7269 0xE0002, // E0002..E001F; UNKNOWN
7270 0xE0020, // E0020..E007F; COMMON
7271 0xE0080, // E0080..E00FF; UNKNOWN
7272 0xE0100, // E0100..E01EF; INHERITED
7273 0xE01F0, // E01F0..10FFFF; UNKNOWN
7274 };
7275
7276 private static final UnicodeScript[] scripts = {
7277 COMMON, // 0000..0040
7278 LATIN, // 0041..005A
7279 COMMON, // 005B..0060
7280 LATIN, // 0061..007A
7281 COMMON, // 007B..00A9
7282 LATIN, // 00AA
7283 COMMON, // 00AB..00B9
7284 LATIN, // 00BA
7285 COMMON, // 00BB..00BF
7286 LATIN, // 00C0..00D6
7287 COMMON, // 00D7
7288 LATIN, // 00D8..00F6
7289 COMMON, // 00F7
7290 LATIN, // 00F8..02B8
7291 COMMON, // 02B9..02DF
7292 LATIN, // 02E0..02E4
7293 COMMON, // 02E5..02E9
7294 BOPOMOFO, // 02EA..02EB
7295 COMMON, // 02EC..02FF
7296 INHERITED, // 0300..036F
7297 GREEK, // 0370..0373
7298 COMMON, // 0374
7299 GREEK, // 0375..0377
7300 UNKNOWN, // 0378..0379
7301 GREEK, // 037A..037D
7302 COMMON, // 037E
7303 GREEK, // 037F
7304 UNKNOWN, // 0380..0383
7305 GREEK, // 0384
7306 COMMON, // 0385
7307 GREEK, // 0386
7308 COMMON, // 0387
7309 GREEK, // 0388..038A
7310 UNKNOWN, // 038B
7311 GREEK, // 038C
7312 UNKNOWN, // 038D
7313 GREEK, // 038E..03A1
7314 UNKNOWN, // 03A2
7315 GREEK, // 03A3..03E1
7316 COPTIC, // 03E2..03EF
7317 GREEK, // 03F0..03FF
7318 CYRILLIC, // 0400..0484
7319 INHERITED, // 0485..0486
7320 CYRILLIC, // 0487..052F
7321 UNKNOWN, // 0530
7322 ARMENIAN, // 0531..0556
7323 UNKNOWN, // 0557..0558
7324 ARMENIAN, // 0559..058A
7325 UNKNOWN, // 058B..058C
7326 ARMENIAN, // 058D..058F
7327 UNKNOWN, // 0590
7328 HEBREW, // 0591..05C7
7329 UNKNOWN, // 05C8..05CF
7330 HEBREW, // 05D0..05EA
7331 UNKNOWN, // 05EB..05EE
7332 HEBREW, // 05EF..05F4
7333 UNKNOWN, // 05F5..05FF
7334 ARABIC, // 0600..0604
7335 COMMON, // 0605
7336 ARABIC, // 0606..060B
7337 COMMON, // 060C
7338 ARABIC, // 060D..061A
7339 COMMON, // 061B
7340 ARABIC, // 061C..061E
7341 COMMON, // 061F
7342 ARABIC, // 0620..063F
7343 COMMON, // 0640
7344 ARABIC, // 0641..064A
7345 INHERITED, // 064B..0655
7346 ARABIC, // 0656..066F
7347 INHERITED, // 0670
7348 ARABIC, // 0671..06DC
7349 COMMON, // 06DD
7350 ARABIC, // 06DE..06FF
7351 SYRIAC, // 0700..070D
7352 UNKNOWN, // 070E
7353 SYRIAC, // 070F..074A
7354 UNKNOWN, // 074B..074C
7355 SYRIAC, // 074D..074F
7356 ARABIC, // 0750..077F
7357 THAANA, // 0780..07B1
7358 UNKNOWN, // 07B2..07BF
7359 NKO, // 07C0..07FA
7360 UNKNOWN, // 07FB..07FC
7361 NKO, // 07FD..07FF
7362 SAMARITAN, // 0800..082D
7363 UNKNOWN, // 082E..082F
7364 SAMARITAN, // 0830..083E
7365 UNKNOWN, // 083F
7366 MANDAIC, // 0840..085B
7367 UNKNOWN, // 085C..085D
7368 MANDAIC, // 085E
7369 UNKNOWN, // 085F
7370 SYRIAC, // 0860..086A
7371 UNKNOWN, // 086B..086F
7372 ARABIC, // 0870..088E
7373 UNKNOWN, // 088F
7374 ARABIC, // 0890..0891
7375 UNKNOWN, // 0892..0896
7376 ARABIC, // 0897..08E1
7377 COMMON, // 08E2
7378 ARABIC, // 08E3..08FF
7379 DEVANAGARI, // 0900..0950
7380 INHERITED, // 0951..0954
7381 DEVANAGARI, // 0955..0963
7382 COMMON, // 0964..0965
7383 DEVANAGARI, // 0966..097F
7384 BENGALI, // 0980..0983
7385 UNKNOWN, // 0984
7386 BENGALI, // 0985..098C
7387 UNKNOWN, // 098D..098E
7388 BENGALI, // 098F..0990
7389 UNKNOWN, // 0991..0992
7390 BENGALI, // 0993..09A8
7391 UNKNOWN, // 09A9
7392 BENGALI, // 09AA..09B0
7393 UNKNOWN, // 09B1
7394 BENGALI, // 09B2
7395 UNKNOWN, // 09B3..09B5
7396 BENGALI, // 09B6..09B9
7397 UNKNOWN, // 09BA..09BB
7398 BENGALI, // 09BC..09C4
7399 UNKNOWN, // 09C5..09C6
7400 BENGALI, // 09C7..09C8
7401 UNKNOWN, // 09C9..09CA
7402 BENGALI, // 09CB..09CE
7403 UNKNOWN, // 09CF..09D6
7404 BENGALI, // 09D7
7405 UNKNOWN, // 09D8..09DB
7406 BENGALI, // 09DC..09DD
7407 UNKNOWN, // 09DE
7408 BENGALI, // 09DF..09E3
7409 UNKNOWN, // 09E4..09E5
7410 BENGALI, // 09E6..09FE
7411 UNKNOWN, // 09FF..0A00
7412 GURMUKHI, // 0A01..0A03
7413 UNKNOWN, // 0A04
7414 GURMUKHI, // 0A05..0A0A
7415 UNKNOWN, // 0A0B..0A0E
7416 GURMUKHI, // 0A0F..0A10
7417 UNKNOWN, // 0A11..0A12
7418 GURMUKHI, // 0A13..0A28
7419 UNKNOWN, // 0A29
7420 GURMUKHI, // 0A2A..0A30
7421 UNKNOWN, // 0A31
7422 GURMUKHI, // 0A32..0A33
7423 UNKNOWN, // 0A34
7424 GURMUKHI, // 0A35..0A36
7425 UNKNOWN, // 0A37
7426 GURMUKHI, // 0A38..0A39
7427 UNKNOWN, // 0A3A..0A3B
7428 GURMUKHI, // 0A3C
7429 UNKNOWN, // 0A3D
7430 GURMUKHI, // 0A3E..0A42
7431 UNKNOWN, // 0A43..0A46
7432 GURMUKHI, // 0A47..0A48
7433 UNKNOWN, // 0A49..0A4A
7434 GURMUKHI, // 0A4B..0A4D
7435 UNKNOWN, // 0A4E..0A50
7436 GURMUKHI, // 0A51
7437 UNKNOWN, // 0A52..0A58
7438 GURMUKHI, // 0A59..0A5C
7439 UNKNOWN, // 0A5D
7440 GURMUKHI, // 0A5E
7441 UNKNOWN, // 0A5F..0A65
7442 GURMUKHI, // 0A66..0A76
7443 UNKNOWN, // 0A77..0A80
7444 GUJARATI, // 0A81..0A83
7445 UNKNOWN, // 0A84
7446 GUJARATI, // 0A85..0A8D
7447 UNKNOWN, // 0A8E
7448 GUJARATI, // 0A8F..0A91
7449 UNKNOWN, // 0A92
7450 GUJARATI, // 0A93..0AA8
7451 UNKNOWN, // 0AA9
7452 GUJARATI, // 0AAA..0AB0
7453 UNKNOWN, // 0AB1
7454 GUJARATI, // 0AB2..0AB3
7455 UNKNOWN, // 0AB4
7456 GUJARATI, // 0AB5..0AB9
7457 UNKNOWN, // 0ABA..0ABB
7458 GUJARATI, // 0ABC..0AC5
7459 UNKNOWN, // 0AC6
7460 GUJARATI, // 0AC7..0AC9
7461 UNKNOWN, // 0ACA
7462 GUJARATI, // 0ACB..0ACD
7463 UNKNOWN, // 0ACE..0ACF
7464 GUJARATI, // 0AD0
7465 UNKNOWN, // 0AD1..0ADF
7466 GUJARATI, // 0AE0..0AE3
7467 UNKNOWN, // 0AE4..0AE5
7468 GUJARATI, // 0AE6..0AF1
7469 UNKNOWN, // 0AF2..0AF8
7470 GUJARATI, // 0AF9..0AFF
7471 UNKNOWN, // 0B00
7472 ORIYA, // 0B01..0B03
7473 UNKNOWN, // 0B04
7474 ORIYA, // 0B05..0B0C
7475 UNKNOWN, // 0B0D..0B0E
7476 ORIYA, // 0B0F..0B10
7477 UNKNOWN, // 0B11..0B12
7478 ORIYA, // 0B13..0B28
7479 UNKNOWN, // 0B29
7480 ORIYA, // 0B2A..0B30
7481 UNKNOWN, // 0B31
7482 ORIYA, // 0B32..0B33
7483 UNKNOWN, // 0B34
7484 ORIYA, // 0B35..0B39
7485 UNKNOWN, // 0B3A..0B3B
7486 ORIYA, // 0B3C..0B44
7487 UNKNOWN, // 0B45..0B46
7488 ORIYA, // 0B47..0B48
7489 UNKNOWN, // 0B49..0B4A
7490 ORIYA, // 0B4B..0B4D
7491 UNKNOWN, // 0B4E..0B54
7492 ORIYA, // 0B55..0B57
7493 UNKNOWN, // 0B58..0B5B
7494 ORIYA, // 0B5C..0B5D
7495 UNKNOWN, // 0B5E
7496 ORIYA, // 0B5F..0B63
7497 UNKNOWN, // 0B64..0B65
7498 ORIYA, // 0B66..0B77
7499 UNKNOWN, // 0B78..0B81
7500 TAMIL, // 0B82..0B83
7501 UNKNOWN, // 0B84
7502 TAMIL, // 0B85..0B8A
7503 UNKNOWN, // 0B8B..0B8D
7504 TAMIL, // 0B8E..0B90
7505 UNKNOWN, // 0B91
7506 TAMIL, // 0B92..0B95
7507 UNKNOWN, // 0B96..0B98
7508 TAMIL, // 0B99..0B9A
7509 UNKNOWN, // 0B9B
7510 TAMIL, // 0B9C
7511 UNKNOWN, // 0B9D
7512 TAMIL, // 0B9E..0B9F
7513 UNKNOWN, // 0BA0..0BA2
7514 TAMIL, // 0BA3..0BA4
7515 UNKNOWN, // 0BA5..0BA7
7516 TAMIL, // 0BA8..0BAA
7517 UNKNOWN, // 0BAB..0BAD
7518 TAMIL, // 0BAE..0BB9
7519 UNKNOWN, // 0BBA..0BBD
7520 TAMIL, // 0BBE..0BC2
7521 UNKNOWN, // 0BC3..0BC5
7522 TAMIL, // 0BC6..0BC8
7523 UNKNOWN, // 0BC9
7524 TAMIL, // 0BCA..0BCD
7525 UNKNOWN, // 0BCE..0BCF
7526 TAMIL, // 0BD0
7527 UNKNOWN, // 0BD1..0BD6
7528 TAMIL, // 0BD7
7529 UNKNOWN, // 0BD8..0BE5
7530 TAMIL, // 0BE6..0BFA
7531 UNKNOWN, // 0BFB..0BFF
7532 TELUGU, // 0C00..0C0C
7533 UNKNOWN, // 0C0D
7534 TELUGU, // 0C0E..0C10
7535 UNKNOWN, // 0C11
7536 TELUGU, // 0C12..0C28
7537 UNKNOWN, // 0C29
7538 TELUGU, // 0C2A..0C39
7539 UNKNOWN, // 0C3A..0C3B
7540 TELUGU, // 0C3C..0C44
7541 UNKNOWN, // 0C45
7542 TELUGU, // 0C46..0C48
7543 UNKNOWN, // 0C49
7544 TELUGU, // 0C4A..0C4D
7545 UNKNOWN, // 0C4E..0C54
7546 TELUGU, // 0C55..0C56
7547 UNKNOWN, // 0C57
7548 TELUGU, // 0C58..0C5A
7549 UNKNOWN, // 0C5B..0C5C
7550 TELUGU, // 0C5D
7551 UNKNOWN, // 0C5E..0C5F
7552 TELUGU, // 0C60..0C63
7553 UNKNOWN, // 0C64..0C65
7554 TELUGU, // 0C66..0C6F
7555 UNKNOWN, // 0C70..0C76
7556 TELUGU, // 0C77..0C7F
7557 KANNADA, // 0C80..0C8C
7558 UNKNOWN, // 0C8D
7559 KANNADA, // 0C8E..0C90
7560 UNKNOWN, // 0C91
7561 KANNADA, // 0C92..0CA8
7562 UNKNOWN, // 0CA9
7563 KANNADA, // 0CAA..0CB3
7564 UNKNOWN, // 0CB4
7565 KANNADA, // 0CB5..0CB9
7566 UNKNOWN, // 0CBA..0CBB
7567 KANNADA, // 0CBC..0CC4
7568 UNKNOWN, // 0CC5
7569 KANNADA, // 0CC6..0CC8
7570 UNKNOWN, // 0CC9
7571 KANNADA, // 0CCA..0CCD
7572 UNKNOWN, // 0CCE..0CD4
7573 KANNADA, // 0CD5..0CD6
7574 UNKNOWN, // 0CD7..0CDC
7575 KANNADA, // 0CDD..0CDE
7576 UNKNOWN, // 0CDF
7577 KANNADA, // 0CE0..0CE3
7578 UNKNOWN, // 0CE4..0CE5
7579 KANNADA, // 0CE6..0CEF
7580 UNKNOWN, // 0CF0
7581 KANNADA, // 0CF1..0CF3
7582 UNKNOWN, // 0CF4..0CFF
7583 MALAYALAM, // 0D00..0D0C
7584 UNKNOWN, // 0D0D
7585 MALAYALAM, // 0D0E..0D10
7586 UNKNOWN, // 0D11
7587 MALAYALAM, // 0D12..0D44
7588 UNKNOWN, // 0D45
7589 MALAYALAM, // 0D46..0D48
7590 UNKNOWN, // 0D49
7591 MALAYALAM, // 0D4A..0D4F
7592 UNKNOWN, // 0D50..0D53
7593 MALAYALAM, // 0D54..0D63
7594 UNKNOWN, // 0D64..0D65
7595 MALAYALAM, // 0D66..0D7F
7596 UNKNOWN, // 0D80
7597 SINHALA, // 0D81..0D83
7598 UNKNOWN, // 0D84
7599 SINHALA, // 0D85..0D96
7600 UNKNOWN, // 0D97..0D99
7601 SINHALA, // 0D9A..0DB1
7602 UNKNOWN, // 0DB2
7603 SINHALA, // 0DB3..0DBB
7604 UNKNOWN, // 0DBC
7605 SINHALA, // 0DBD
7606 UNKNOWN, // 0DBE..0DBF
7607 SINHALA, // 0DC0..0DC6
7608 UNKNOWN, // 0DC7..0DC9
7609 SINHALA, // 0DCA
7610 UNKNOWN, // 0DCB..0DCE
7611 SINHALA, // 0DCF..0DD4
7612 UNKNOWN, // 0DD5
7613 SINHALA, // 0DD6
7614 UNKNOWN, // 0DD7
7615 SINHALA, // 0DD8..0DDF
7616 UNKNOWN, // 0DE0..0DE5
7617 SINHALA, // 0DE6..0DEF
7618 UNKNOWN, // 0DF0..0DF1
7619 SINHALA, // 0DF2..0DF4
7620 UNKNOWN, // 0DF5..0E00
7621 THAI, // 0E01..0E3A
7622 UNKNOWN, // 0E3B..0E3E
7623 COMMON, // 0E3F
7624 THAI, // 0E40..0E5B
7625 UNKNOWN, // 0E5C..0E80
7626 LAO, // 0E81..0E82
7627 UNKNOWN, // 0E83
7628 LAO, // 0E84
7629 UNKNOWN, // 0E85
7630 LAO, // 0E86..0E8A
7631 UNKNOWN, // 0E8B
7632 LAO, // 0E8C..0EA3
7633 UNKNOWN, // 0EA4
7634 LAO, // 0EA5
7635 UNKNOWN, // 0EA6
7636 LAO, // 0EA7..0EBD
7637 UNKNOWN, // 0EBE..0EBF
7638 LAO, // 0EC0..0EC4
7639 UNKNOWN, // 0EC5
7640 LAO, // 0EC6
7641 UNKNOWN, // 0EC7
7642 LAO, // 0EC8..0ECE
7643 UNKNOWN, // 0ECF
7644 LAO, // 0ED0..0ED9
7645 UNKNOWN, // 0EDA..0EDB
7646 LAO, // 0EDC..0EDF
7647 UNKNOWN, // 0EE0..0EFF
7648 TIBETAN, // 0F00..0F47
7649 UNKNOWN, // 0F48
7650 TIBETAN, // 0F49..0F6C
7651 UNKNOWN, // 0F6D..0F70
7652 TIBETAN, // 0F71..0F97
7653 UNKNOWN, // 0F98
7654 TIBETAN, // 0F99..0FBC
7655 UNKNOWN, // 0FBD
7656 TIBETAN, // 0FBE..0FCC
7657 UNKNOWN, // 0FCD
7658 TIBETAN, // 0FCE..0FD4
7659 COMMON, // 0FD5..0FD8
7660 TIBETAN, // 0FD9..0FDA
7661 UNKNOWN, // 0FDB..0FFF
7662 MYANMAR, // 1000..109F
7663 GEORGIAN, // 10A0..10C5
7664 UNKNOWN, // 10C6
7665 GEORGIAN, // 10C7
7666 UNKNOWN, // 10C8..10CC
7667 GEORGIAN, // 10CD
7668 UNKNOWN, // 10CE..10CF
7669 GEORGIAN, // 10D0..10FA
7670 COMMON, // 10FB
7671 GEORGIAN, // 10FC..10FF
7672 HANGUL, // 1100..11FF
7673 ETHIOPIC, // 1200..1248
7674 UNKNOWN, // 1249
7675 ETHIOPIC, // 124A..124D
7676 UNKNOWN, // 124E..124F
7677 ETHIOPIC, // 1250..1256
7678 UNKNOWN, // 1257
7679 ETHIOPIC, // 1258
7680 UNKNOWN, // 1259
7681 ETHIOPIC, // 125A..125D
7682 UNKNOWN, // 125E..125F
7683 ETHIOPIC, // 1260..1288
7684 UNKNOWN, // 1289
7685 ETHIOPIC, // 128A..128D
7686 UNKNOWN, // 128E..128F
7687 ETHIOPIC, // 1290..12B0
7688 UNKNOWN, // 12B1
7689 ETHIOPIC, // 12B2..12B5
7690 UNKNOWN, // 12B6..12B7
7691 ETHIOPIC, // 12B8..12BE
7692 UNKNOWN, // 12BF
7693 ETHIOPIC, // 12C0
7694 UNKNOWN, // 12C1
7695 ETHIOPIC, // 12C2..12C5
7696 UNKNOWN, // 12C6..12C7
7697 ETHIOPIC, // 12C8..12D6
7698 UNKNOWN, // 12D7
7699 ETHIOPIC, // 12D8..1310
7700 UNKNOWN, // 1311
7701 ETHIOPIC, // 1312..1315
7702 UNKNOWN, // 1316..1317
7703 ETHIOPIC, // 1318..135A
7704 UNKNOWN, // 135B..135C
7705 ETHIOPIC, // 135D..137C
7706 UNKNOWN, // 137D..137F
7707 ETHIOPIC, // 1380..1399
7708 UNKNOWN, // 139A..139F
7709 CHEROKEE, // 13A0..13F5
7710 UNKNOWN, // 13F6..13F7
7711 CHEROKEE, // 13F8..13FD
7712 UNKNOWN, // 13FE..13FF
7713 CANADIAN_ABORIGINAL, // 1400..167F
7714 OGHAM, // 1680..169C
7715 UNKNOWN, // 169D..169F
7716 RUNIC, // 16A0..16EA
7717 COMMON, // 16EB..16ED
7718 RUNIC, // 16EE..16F8
7719 UNKNOWN, // 16F9..16FF
7720 TAGALOG, // 1700..1715
7721 UNKNOWN, // 1716..171E
7722 TAGALOG, // 171F
7723 HANUNOO, // 1720..1734
7724 COMMON, // 1735..1736
7725 UNKNOWN, // 1737..173F
7726 BUHID, // 1740..1753
7727 UNKNOWN, // 1754..175F
7728 TAGBANWA, // 1760..176C
7729 UNKNOWN, // 176D
7730 TAGBANWA, // 176E..1770
7731 UNKNOWN, // 1771
7732 TAGBANWA, // 1772..1773
7733 UNKNOWN, // 1774..177F
7734 KHMER, // 1780..17DD
7735 UNKNOWN, // 17DE..17DF
7736 KHMER, // 17E0..17E9
7737 UNKNOWN, // 17EA..17EF
7738 KHMER, // 17F0..17F9
7739 UNKNOWN, // 17FA..17FF
7740 MONGOLIAN, // 1800..1801
7741 COMMON, // 1802..1803
7742 MONGOLIAN, // 1804
7743 COMMON, // 1805
7744 MONGOLIAN, // 1806..1819
7745 UNKNOWN, // 181A..181F
7746 MONGOLIAN, // 1820..1878
7747 UNKNOWN, // 1879..187F
7748 MONGOLIAN, // 1880..18AA
7749 UNKNOWN, // 18AB..18AF
7750 CANADIAN_ABORIGINAL, // 18B0..18F5
7751 UNKNOWN, // 18F6..18FF
7752 LIMBU, // 1900..191E
7753 UNKNOWN, // 191F
7754 LIMBU, // 1920..192B
7755 UNKNOWN, // 192C..192F
7756 LIMBU, // 1930..193B
7757 UNKNOWN, // 193C..193F
7758 LIMBU, // 1940
7759 UNKNOWN, // 1941..1943
7760 LIMBU, // 1944..194F
7761 TAI_LE, // 1950..196D
7762 UNKNOWN, // 196E..196F
7763 TAI_LE, // 1970..1974
7764 UNKNOWN, // 1975..197F
7765 NEW_TAI_LUE, // 1980..19AB
7766 UNKNOWN, // 19AC..19AF
7767 NEW_TAI_LUE, // 19B0..19C9
7768 UNKNOWN, // 19CA..19CF
7769 NEW_TAI_LUE, // 19D0..19DA
7770 UNKNOWN, // 19DB..19DD
7771 NEW_TAI_LUE, // 19DE..19DF
7772 KHMER, // 19E0..19FF
7773 BUGINESE, // 1A00..1A1B
7774 UNKNOWN, // 1A1C..1A1D
7775 BUGINESE, // 1A1E..1A1F
7776 TAI_THAM, // 1A20..1A5E
7777 UNKNOWN, // 1A5F
7778 TAI_THAM, // 1A60..1A7C
7779 UNKNOWN, // 1A7D..1A7E
7780 TAI_THAM, // 1A7F..1A89
7781 UNKNOWN, // 1A8A..1A8F
7782 TAI_THAM, // 1A90..1A99
7783 UNKNOWN, // 1A9A..1A9F
7784 TAI_THAM, // 1AA0..1AAD
7785 UNKNOWN, // 1AAE..1AAF
7786 INHERITED, // 1AB0..1ACE
7787 UNKNOWN, // 1ACF..1AFF
7788 BALINESE, // 1B00..1B4C
7789 UNKNOWN, // 1B4D
7790 BALINESE, // 1B4E..1B7F
7791 SUNDANESE, // 1B80..1BBF
7792 BATAK, // 1BC0..1BF3
7793 UNKNOWN, // 1BF4..1BFB
7794 BATAK, // 1BFC..1BFF
7795 LEPCHA, // 1C00..1C37
7796 UNKNOWN, // 1C38..1C3A
7797 LEPCHA, // 1C3B..1C49
7798 UNKNOWN, // 1C4A..1C4C
7799 LEPCHA, // 1C4D..1C4F
7800 OL_CHIKI, // 1C50..1C7F
7801 CYRILLIC, // 1C80..1C8A
7802 UNKNOWN, // 1C8B..1C8F
7803 GEORGIAN, // 1C90..1CBA
7804 UNKNOWN, // 1CBB..1CBC
7805 GEORGIAN, // 1CBD..1CBF
7806 SUNDANESE, // 1CC0..1CC7
7807 UNKNOWN, // 1CC8..1CCF
7808 INHERITED, // 1CD0..1CD2
7809 COMMON, // 1CD3
7810 INHERITED, // 1CD4..1CE0
7811 COMMON, // 1CE1
7812 INHERITED, // 1CE2..1CE8
7813 COMMON, // 1CE9..1CEC
7814 INHERITED, // 1CED
7815 COMMON, // 1CEE..1CF3
7816 INHERITED, // 1CF4
7817 COMMON, // 1CF5..1CF7
7818 INHERITED, // 1CF8..1CF9
7819 COMMON, // 1CFA
7820 UNKNOWN, // 1CFB..1CFF
7821 LATIN, // 1D00..1D25
7822 GREEK, // 1D26..1D2A
7823 CYRILLIC, // 1D2B
7824 LATIN, // 1D2C..1D5C
7825 GREEK, // 1D5D..1D61
7826 LATIN, // 1D62..1D65
7827 GREEK, // 1D66..1D6A
7828 LATIN, // 1D6B..1D77
7829 CYRILLIC, // 1D78
7830 LATIN, // 1D79..1DBE
7831 GREEK, // 1DBF
7832 INHERITED, // 1DC0..1DFF
7833 LATIN, // 1E00..1EFF
7834 GREEK, // 1F00..1F15
7835 UNKNOWN, // 1F16..1F17
7836 GREEK, // 1F18..1F1D
7837 UNKNOWN, // 1F1E..1F1F
7838 GREEK, // 1F20..1F45
7839 UNKNOWN, // 1F46..1F47
7840 GREEK, // 1F48..1F4D
7841 UNKNOWN, // 1F4E..1F4F
7842 GREEK, // 1F50..1F57
7843 UNKNOWN, // 1F58
7844 GREEK, // 1F59
7845 UNKNOWN, // 1F5A
7846 GREEK, // 1F5B
7847 UNKNOWN, // 1F5C
7848 GREEK, // 1F5D
7849 UNKNOWN, // 1F5E
7850 GREEK, // 1F5F..1F7D
7851 UNKNOWN, // 1F7E..1F7F
7852 GREEK, // 1F80..1FB4
7853 UNKNOWN, // 1FB5
7854 GREEK, // 1FB6..1FC4
7855 UNKNOWN, // 1FC5
7856 GREEK, // 1FC6..1FD3
7857 UNKNOWN, // 1FD4..1FD5
7858 GREEK, // 1FD6..1FDB
7859 UNKNOWN, // 1FDC
7860 GREEK, // 1FDD..1FEF
7861 UNKNOWN, // 1FF0..1FF1
7862 GREEK, // 1FF2..1FF4
7863 UNKNOWN, // 1FF5
7864 GREEK, // 1FF6..1FFE
7865 UNKNOWN, // 1FFF
7866 COMMON, // 2000..200B
7867 INHERITED, // 200C..200D
7868 COMMON, // 200E..2064
7869 UNKNOWN, // 2065
7870 COMMON, // 2066..2070
7871 LATIN, // 2071
7872 UNKNOWN, // 2072..2073
7873 COMMON, // 2074..207E
7874 LATIN, // 207F
7875 COMMON, // 2080..208E
7876 UNKNOWN, // 208F
7877 LATIN, // 2090..209C
7878 UNKNOWN, // 209D..209F
7879 COMMON, // 20A0..20C0
7880 UNKNOWN, // 20C1..20CF
7881 INHERITED, // 20D0..20F0
7882 UNKNOWN, // 20F1..20FF
7883 COMMON, // 2100..2125
7884 GREEK, // 2126
7885 COMMON, // 2127..2129
7886 LATIN, // 212A..212B
7887 COMMON, // 212C..2131
7888 LATIN, // 2132
7889 COMMON, // 2133..214D
7890 LATIN, // 214E
7891 COMMON, // 214F..215F
7892 LATIN, // 2160..2188
7893 COMMON, // 2189..218B
7894 UNKNOWN, // 218C..218F
7895 COMMON, // 2190..2429
7896 UNKNOWN, // 242A..243F
7897 COMMON, // 2440..244A
7898 UNKNOWN, // 244B..245F
7899 COMMON, // 2460..27FF
7900 BRAILLE, // 2800..28FF
7901 COMMON, // 2900..2B73
7902 UNKNOWN, // 2B74..2B75
7903 COMMON, // 2B76..2B95
7904 UNKNOWN, // 2B96
7905 COMMON, // 2B97..2BFF
7906 GLAGOLITIC, // 2C00..2C5F
7907 LATIN, // 2C60..2C7F
7908 COPTIC, // 2C80..2CF3
7909 UNKNOWN, // 2CF4..2CF8
7910 COPTIC, // 2CF9..2CFF
7911 GEORGIAN, // 2D00..2D25
7912 UNKNOWN, // 2D26
7913 GEORGIAN, // 2D27
7914 UNKNOWN, // 2D28..2D2C
7915 GEORGIAN, // 2D2D
7916 UNKNOWN, // 2D2E..2D2F
7917 TIFINAGH, // 2D30..2D67
7918 UNKNOWN, // 2D68..2D6E
7919 TIFINAGH, // 2D6F..2D70
7920 UNKNOWN, // 2D71..2D7E
7921 TIFINAGH, // 2D7F
7922 ETHIOPIC, // 2D80..2D96
7923 UNKNOWN, // 2D97..2D9F
7924 ETHIOPIC, // 2DA0..2DA6
7925 UNKNOWN, // 2DA7
7926 ETHIOPIC, // 2DA8..2DAE
7927 UNKNOWN, // 2DAF
7928 ETHIOPIC, // 2DB0..2DB6
7929 UNKNOWN, // 2DB7
7930 ETHIOPIC, // 2DB8..2DBE
7931 UNKNOWN, // 2DBF
7932 ETHIOPIC, // 2DC0..2DC6
7933 UNKNOWN, // 2DC7
7934 ETHIOPIC, // 2DC8..2DCE
7935 UNKNOWN, // 2DCF
7936 ETHIOPIC, // 2DD0..2DD6
7937 UNKNOWN, // 2DD7
7938 ETHIOPIC, // 2DD8..2DDE
7939 UNKNOWN, // 2DDF
7940 CYRILLIC, // 2DE0..2DFF
7941 COMMON, // 2E00..2E5D
7942 UNKNOWN, // 2E5E..2E7F
7943 HAN, // 2E80..2E99
7944 UNKNOWN, // 2E9A
7945 HAN, // 2E9B..2EF3
7946 UNKNOWN, // 2EF4..2EFF
7947 HAN, // 2F00..2FD5
7948 UNKNOWN, // 2FD6..2FEF
7949 COMMON, // 2FF0..3004
7950 HAN, // 3005
7951 COMMON, // 3006
7952 HAN, // 3007
7953 COMMON, // 3008..3020
7954 HAN, // 3021..3029
7955 INHERITED, // 302A..302D
7956 HANGUL, // 302E..302F
7957 COMMON, // 3030..3037
7958 HAN, // 3038..303B
7959 COMMON, // 303C..303F
7960 UNKNOWN, // 3040
7961 HIRAGANA, // 3041..3096
7962 UNKNOWN, // 3097..3098
7963 INHERITED, // 3099..309A
7964 COMMON, // 309B..309C
7965 HIRAGANA, // 309D..309F
7966 COMMON, // 30A0
7967 KATAKANA, // 30A1..30FA
7968 COMMON, // 30FB..30FC
7969 KATAKANA, // 30FD..30FF
7970 UNKNOWN, // 3100..3104
7971 BOPOMOFO, // 3105..312F
7972 UNKNOWN, // 3130
7973 HANGUL, // 3131..318E
7974 UNKNOWN, // 318F
7975 COMMON, // 3190..319F
7976 BOPOMOFO, // 31A0..31BF
7977 COMMON, // 31C0..31E5
7978 UNKNOWN, // 31E6..31EE
7979 COMMON, // 31EF
7980 KATAKANA, // 31F0..31FF
7981 HANGUL, // 3200..321E
7982 UNKNOWN, // 321F
7983 COMMON, // 3220..325F
7984 HANGUL, // 3260..327E
7985 COMMON, // 327F..32CF
7986 KATAKANA, // 32D0..32FE
7987 COMMON, // 32FF
7988 KATAKANA, // 3300..3357
7989 COMMON, // 3358..33FF
7990 HAN, // 3400..4DBF
7991 COMMON, // 4DC0..4DFF
7992 HAN, // 4E00..9FFF
7993 YI, // A000..A48C
7994 UNKNOWN, // A48D..A48F
7995 YI, // A490..A4C6
7996 UNKNOWN, // A4C7..A4CF
7997 LISU, // A4D0..A4FF
7998 VAI, // A500..A62B
7999 UNKNOWN, // A62C..A63F
8000 CYRILLIC, // A640..A69F
8001 BAMUM, // A6A0..A6F7
8002 UNKNOWN, // A6F8..A6FF
8003 COMMON, // A700..A721
8004 LATIN, // A722..A787
8005 COMMON, // A788..A78A
8006 LATIN, // A78B..A7CD
8007 UNKNOWN, // A7CE..A7CF
8008 LATIN, // A7D0..A7D1
8009 UNKNOWN, // A7D2
8010 LATIN, // A7D3
8011 UNKNOWN, // A7D4
8012 LATIN, // A7D5..A7DC
8013 UNKNOWN, // A7DD..A7F1
8014 LATIN, // A7F2..A7FF
8015 SYLOTI_NAGRI, // A800..A82C
8016 UNKNOWN, // A82D..A82F
8017 COMMON, // A830..A839
8018 UNKNOWN, // A83A..A83F
8019 PHAGS_PA, // A840..A877
8020 UNKNOWN, // A878..A87F
8021 SAURASHTRA, // A880..A8C5
8022 UNKNOWN, // A8C6..A8CD
8023 SAURASHTRA, // A8CE..A8D9
8024 UNKNOWN, // A8DA..A8DF
8025 DEVANAGARI, // A8E0..A8FF
8026 KAYAH_LI, // A900..A92D
8027 COMMON, // A92E
8028 KAYAH_LI, // A92F
8029 REJANG, // A930..A953
8030 UNKNOWN, // A954..A95E
8031 REJANG, // A95F
8032 HANGUL, // A960..A97C
8033 UNKNOWN, // A97D..A97F
8034 JAVANESE, // A980..A9CD
8035 UNKNOWN, // A9CE
8036 COMMON, // A9CF
8037 JAVANESE, // A9D0..A9D9
8038 UNKNOWN, // A9DA..A9DD
8039 JAVANESE, // A9DE..A9DF
8040 MYANMAR, // A9E0..A9FE
8041 UNKNOWN, // A9FF
8042 CHAM, // AA00..AA36
8043 UNKNOWN, // AA37..AA3F
8044 CHAM, // AA40..AA4D
8045 UNKNOWN, // AA4E..AA4F
8046 CHAM, // AA50..AA59
8047 UNKNOWN, // AA5A..AA5B
8048 CHAM, // AA5C..AA5F
8049 MYANMAR, // AA60..AA7F
8050 TAI_VIET, // AA80..AAC2
8051 UNKNOWN, // AAC3..AADA
8052 TAI_VIET, // AADB..AADF
8053 MEETEI_MAYEK, // AAE0..AAF6
8054 UNKNOWN, // AAF7..AB00
8055 ETHIOPIC, // AB01..AB06
8056 UNKNOWN, // AB07..AB08
8057 ETHIOPIC, // AB09..AB0E
8058 UNKNOWN, // AB0F..AB10
8059 ETHIOPIC, // AB11..AB16
8060 UNKNOWN, // AB17..AB1F
8061 ETHIOPIC, // AB20..AB26
8062 UNKNOWN, // AB27
8063 ETHIOPIC, // AB28..AB2E
8064 UNKNOWN, // AB2F
8065 LATIN, // AB30..AB5A
8066 COMMON, // AB5B
8067 LATIN, // AB5C..AB64
8068 GREEK, // AB65
8069 LATIN, // AB66..AB69
8070 COMMON, // AB6A..AB6B
8071 UNKNOWN, // AB6C..AB6F
8072 CHEROKEE, // AB70..ABBF
8073 MEETEI_MAYEK, // ABC0..ABED
8074 UNKNOWN, // ABEE..ABEF
8075 MEETEI_MAYEK, // ABF0..ABF9
8076 UNKNOWN, // ABFA..ABFF
8077 HANGUL, // AC00..D7A3
8078 UNKNOWN, // D7A4..D7AF
8079 HANGUL, // D7B0..D7C6
8080 UNKNOWN, // D7C7..D7CA
8081 HANGUL, // D7CB..D7FB
8082 UNKNOWN, // D7FC..F8FF
8083 HAN, // F900..FA6D
8084 UNKNOWN, // FA6E..FA6F
8085 HAN, // FA70..FAD9
8086 UNKNOWN, // FADA..FAFF
8087 LATIN, // FB00..FB06
8088 UNKNOWN, // FB07..FB12
8089 ARMENIAN, // FB13..FB17
8090 UNKNOWN, // FB18..FB1C
8091 HEBREW, // FB1D..FB36
8092 UNKNOWN, // FB37
8093 HEBREW, // FB38..FB3C
8094 UNKNOWN, // FB3D
8095 HEBREW, // FB3E
8096 UNKNOWN, // FB3F
8097 HEBREW, // FB40..FB41
8098 UNKNOWN, // FB42
8099 HEBREW, // FB43..FB44
8100 UNKNOWN, // FB45
8101 HEBREW, // FB46..FB4F
8102 ARABIC, // FB50..FBC2
8103 UNKNOWN, // FBC3..FBD2
8104 ARABIC, // FBD3..FD3D
8105 COMMON, // FD3E..FD3F
8106 ARABIC, // FD40..FD8F
8107 UNKNOWN, // FD90..FD91
8108 ARABIC, // FD92..FDC7
8109 UNKNOWN, // FDC8..FDCE
8110 ARABIC, // FDCF
8111 UNKNOWN, // FDD0..FDEF
8112 ARABIC, // FDF0..FDFF
8113 INHERITED, // FE00..FE0F
8114 COMMON, // FE10..FE19
8115 UNKNOWN, // FE1A..FE1F
8116 INHERITED, // FE20..FE2D
8117 CYRILLIC, // FE2E..FE2F
8118 COMMON, // FE30..FE52
8119 UNKNOWN, // FE53
8120 COMMON, // FE54..FE66
8121 UNKNOWN, // FE67
8122 COMMON, // FE68..FE6B
8123 UNKNOWN, // FE6C..FE6F
8124 ARABIC, // FE70..FE74
8125 UNKNOWN, // FE75
8126 ARABIC, // FE76..FEFC
8127 UNKNOWN, // FEFD..FEFE
8128 COMMON, // FEFF
8129 UNKNOWN, // FF00
8130 COMMON, // FF01..FF20
8131 LATIN, // FF21..FF3A
8132 COMMON, // FF3B..FF40
8133 LATIN, // FF41..FF5A
8134 COMMON, // FF5B..FF65
8135 KATAKANA, // FF66..FF6F
8136 COMMON, // FF70
8137 KATAKANA, // FF71..FF9D
8138 COMMON, // FF9E..FF9F
8139 HANGUL, // FFA0..FFBE
8140 UNKNOWN, // FFBF..FFC1
8141 HANGUL, // FFC2..FFC7
8142 UNKNOWN, // FFC8..FFC9
8143 HANGUL, // FFCA..FFCF
8144 UNKNOWN, // FFD0..FFD1
8145 HANGUL, // FFD2..FFD7
8146 UNKNOWN, // FFD8..FFD9
8147 HANGUL, // FFDA..FFDC
8148 UNKNOWN, // FFDD..FFDF
8149 COMMON, // FFE0..FFE6
8150 UNKNOWN, // FFE7
8151 COMMON, // FFE8..FFEE
8152 UNKNOWN, // FFEF..FFF8
8153 COMMON, // FFF9..FFFD
8154 UNKNOWN, // FFFE..FFFF
8155 LINEAR_B, // 10000..1000B
8156 UNKNOWN, // 1000C
8157 LINEAR_B, // 1000D..10026
8158 UNKNOWN, // 10027
8159 LINEAR_B, // 10028..1003A
8160 UNKNOWN, // 1003B
8161 LINEAR_B, // 1003C..1003D
8162 UNKNOWN, // 1003E
8163 LINEAR_B, // 1003F..1004D
8164 UNKNOWN, // 1004E..1004F
8165 LINEAR_B, // 10050..1005D
8166 UNKNOWN, // 1005E..1007F
8167 LINEAR_B, // 10080..100FA
8168 UNKNOWN, // 100FB..100FF
8169 COMMON, // 10100..10102
8170 UNKNOWN, // 10103..10106
8171 COMMON, // 10107..10133
8172 UNKNOWN, // 10134..10136
8173 COMMON, // 10137..1013F
8174 GREEK, // 10140..1018E
8175 UNKNOWN, // 1018F
8176 COMMON, // 10190..1019C
8177 UNKNOWN, // 1019D..1019F
8178 GREEK, // 101A0
8179 UNKNOWN, // 101A1..101CF
8180 COMMON, // 101D0..101FC
8181 INHERITED, // 101FD
8182 UNKNOWN, // 101FE..1027F
8183 LYCIAN, // 10280..1029C
8184 UNKNOWN, // 1029D..1029F
8185 CARIAN, // 102A0..102D0
8186 UNKNOWN, // 102D1..102DF
8187 INHERITED, // 102E0
8188 COMMON, // 102E1..102FB
8189 UNKNOWN, // 102FC..102FF
8190 OLD_ITALIC, // 10300..10323
8191 UNKNOWN, // 10324..1032C
8192 OLD_ITALIC, // 1032D..1032F
8193 GOTHIC, // 10330..1034A
8194 UNKNOWN, // 1034B..1034F
8195 OLD_PERMIC, // 10350..1037A
8196 UNKNOWN, // 1037B..1037F
8197 UGARITIC, // 10380..1039D
8198 UNKNOWN, // 1039E
8199 UGARITIC, // 1039F
8200 OLD_PERSIAN, // 103A0..103C3
8201 UNKNOWN, // 103C4..103C7
8202 OLD_PERSIAN, // 103C8..103D5
8203 UNKNOWN, // 103D6..103FF
8204 DESERET, // 10400..1044F
8205 SHAVIAN, // 10450..1047F
8206 OSMANYA, // 10480..1049D
8207 UNKNOWN, // 1049E..1049F
8208 OSMANYA, // 104A0..104A9
8209 UNKNOWN, // 104AA..104AF
8210 OSAGE, // 104B0..104D3
8211 UNKNOWN, // 104D4..104D7
8212 OSAGE, // 104D8..104FB
8213 UNKNOWN, // 104FC..104FF
8214 ELBASAN, // 10500..10527
8215 UNKNOWN, // 10528..1052F
8216 CAUCASIAN_ALBANIAN, // 10530..10563
8217 UNKNOWN, // 10564..1056E
8218 CAUCASIAN_ALBANIAN, // 1056F
8219 VITHKUQI, // 10570..1057A
8220 UNKNOWN, // 1057B
8221 VITHKUQI, // 1057C..1058A
8222 UNKNOWN, // 1058B
8223 VITHKUQI, // 1058C..10592
8224 UNKNOWN, // 10593
8225 VITHKUQI, // 10594..10595
8226 UNKNOWN, // 10596
8227 VITHKUQI, // 10597..105A1
8228 UNKNOWN, // 105A2
8229 VITHKUQI, // 105A3..105B1
8230 UNKNOWN, // 105B2
8231 VITHKUQI, // 105B3..105B9
8232 UNKNOWN, // 105BA
8233 VITHKUQI, // 105BB..105BC
8234 UNKNOWN, // 105BD..105BF
8235 TODHRI, // 105C0..105F3
8236 UNKNOWN, // 105F4..105FF
8237 LINEAR_A, // 10600..10736
8238 UNKNOWN, // 10737..1073F
8239 LINEAR_A, // 10740..10755
8240 UNKNOWN, // 10756..1075F
8241 LINEAR_A, // 10760..10767
8242 UNKNOWN, // 10768..1077F
8243 LATIN, // 10780..10785
8244 UNKNOWN, // 10786
8245 LATIN, // 10787..107B0
8246 UNKNOWN, // 107B1
8247 LATIN, // 107B2..107BA
8248 UNKNOWN, // 107BB..107FF
8249 CYPRIOT, // 10800..10805
8250 UNKNOWN, // 10806..10807
8251 CYPRIOT, // 10808
8252 UNKNOWN, // 10809
8253 CYPRIOT, // 1080A..10835
8254 UNKNOWN, // 10836
8255 CYPRIOT, // 10837..10838
8256 UNKNOWN, // 10839..1083B
8257 CYPRIOT, // 1083C
8258 UNKNOWN, // 1083D..1083E
8259 CYPRIOT, // 1083F
8260 IMPERIAL_ARAMAIC, // 10840..10855
8261 UNKNOWN, // 10856
8262 IMPERIAL_ARAMAIC, // 10857..1085F
8263 PALMYRENE, // 10860..1087F
8264 NABATAEAN, // 10880..1089E
8265 UNKNOWN, // 1089F..108A6
8266 NABATAEAN, // 108A7..108AF
8267 UNKNOWN, // 108B0..108DF
8268 HATRAN, // 108E0..108F2
8269 UNKNOWN, // 108F3
8270 HATRAN, // 108F4..108F5
8271 UNKNOWN, // 108F6..108FA
8272 HATRAN, // 108FB..108FF
8273 PHOENICIAN, // 10900..1091B
8274 UNKNOWN, // 1091C..1091E
8275 PHOENICIAN, // 1091F
8276 LYDIAN, // 10920..10939
8277 UNKNOWN, // 1093A..1093E
8278 LYDIAN, // 1093F
8279 UNKNOWN, // 10940..1097F
8280 MEROITIC_HIEROGLYPHS, // 10980..1099F
8281 MEROITIC_CURSIVE, // 109A0..109B7
8282 UNKNOWN, // 109B8..109BB
8283 MEROITIC_CURSIVE, // 109BC..109CF
8284 UNKNOWN, // 109D0..109D1
8285 MEROITIC_CURSIVE, // 109D2..109FF
8286 KHAROSHTHI, // 10A00..10A03
8287 UNKNOWN, // 10A04
8288 KHAROSHTHI, // 10A05..10A06
8289 UNKNOWN, // 10A07..10A0B
8290 KHAROSHTHI, // 10A0C..10A13
8291 UNKNOWN, // 10A14
8292 KHAROSHTHI, // 10A15..10A17
8293 UNKNOWN, // 10A18
8294 KHAROSHTHI, // 10A19..10A35
8295 UNKNOWN, // 10A36..10A37
8296 KHAROSHTHI, // 10A38..10A3A
8297 UNKNOWN, // 10A3B..10A3E
8298 KHAROSHTHI, // 10A3F..10A48
8299 UNKNOWN, // 10A49..10A4F
8300 KHAROSHTHI, // 10A50..10A58
8301 UNKNOWN, // 10A59..10A5F
8302 OLD_SOUTH_ARABIAN, // 10A60..10A7F
8303 OLD_NORTH_ARABIAN, // 10A80..10A9F
8304 UNKNOWN, // 10AA0..10ABF
8305 MANICHAEAN, // 10AC0..10AE6
8306 UNKNOWN, // 10AE7..10AEA
8307 MANICHAEAN, // 10AEB..10AF6
8308 UNKNOWN, // 10AF7..10AFF
8309 AVESTAN, // 10B00..10B35
8310 UNKNOWN, // 10B36..10B38
8311 AVESTAN, // 10B39..10B3F
8312 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55
8313 UNKNOWN, // 10B56..10B57
8314 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F
8315 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72
8316 UNKNOWN, // 10B73..10B77
8317 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F
8318 PSALTER_PAHLAVI, // 10B80..10B91
8319 UNKNOWN, // 10B92..10B98
8320 PSALTER_PAHLAVI, // 10B99..10B9C
8321 UNKNOWN, // 10B9D..10BA8
8322 PSALTER_PAHLAVI, // 10BA9..10BAF
8323 UNKNOWN, // 10BB0..10BFF
8324 OLD_TURKIC, // 10C00..10C48
8325 UNKNOWN, // 10C49..10C7F
8326 OLD_HUNGARIAN, // 10C80..10CB2
8327 UNKNOWN, // 10CB3..10CBF
8328 OLD_HUNGARIAN, // 10CC0..10CF2
8329 UNKNOWN, // 10CF3..10CF9
8330 OLD_HUNGARIAN, // 10CFA..10CFF
8331 HANIFI_ROHINGYA, // 10D00..10D27
8332 UNKNOWN, // 10D28..10D2F
8333 HANIFI_ROHINGYA, // 10D30..10D39
8334 UNKNOWN, // 10D3A..10D3F
8335 GARAY, // 10D40..10D65
8336 UNKNOWN, // 10D66..10D68
8337 GARAY, // 10D69..10D85
8338 UNKNOWN, // 10D86..10D8D
8339 GARAY, // 10D8E..10D8F
8340 UNKNOWN, // 10D90..10E5F
8341 ARABIC, // 10E60..10E7E
8342 UNKNOWN, // 10E7F
8343 YEZIDI, // 10E80..10EA9
8344 UNKNOWN, // 10EAA
8345 YEZIDI, // 10EAB..10EAD
8346 UNKNOWN, // 10EAE..10EAF
8347 YEZIDI, // 10EB0..10EB1
8348 UNKNOWN, // 10EB2..10EC1
8349 ARABIC, // 10EC2..10EC4
8350 UNKNOWN, // 10EC5..10EFB
8351 ARABIC, // 10EFC..10EFF
8352 OLD_SOGDIAN, // 10F00..10F27
8353 UNKNOWN, // 10F28..10F2F
8354 SOGDIAN, // 10F30..10F59
8355 UNKNOWN, // 10F5A..10F6F
8356 OLD_UYGHUR, // 10F70..10F89
8357 UNKNOWN, // 10F8A..10FAF
8358 CHORASMIAN, // 10FB0..10FCB
8359 UNKNOWN, // 10FCC..10FDF
8360 ELYMAIC, // 10FE0..10FF6
8361 UNKNOWN, // 10FF7..10FFF
8362 BRAHMI, // 11000..1104D
8363 UNKNOWN, // 1104E..11051
8364 BRAHMI, // 11052..11075
8365 UNKNOWN, // 11076..1107E
8366 BRAHMI, // 1107F
8367 KAITHI, // 11080..110C2
8368 UNKNOWN, // 110C3..110CC
8369 KAITHI, // 110CD
8370 UNKNOWN, // 110CE..110CF
8371 SORA_SOMPENG, // 110D0..110E8
8372 UNKNOWN, // 110E9..110EF
8373 SORA_SOMPENG, // 110F0..110F9
8374 UNKNOWN, // 110FA..110FF
8375 CHAKMA, // 11100..11134
8376 UNKNOWN, // 11135
8377 CHAKMA, // 11136..11147
8378 UNKNOWN, // 11148..1114F
8379 MAHAJANI, // 11150..11176
8380 UNKNOWN, // 11177..1117F
8381 SHARADA, // 11180..111DF
8382 UNKNOWN, // 111E0
8383 SINHALA, // 111E1..111F4
8384 UNKNOWN, // 111F5..111FF
8385 KHOJKI, // 11200..11211
8386 UNKNOWN, // 11212
8387 KHOJKI, // 11213..11241
8388 UNKNOWN, // 11242..1127F
8389 MULTANI, // 11280..11286
8390 UNKNOWN, // 11287
8391 MULTANI, // 11288
8392 UNKNOWN, // 11289
8393 MULTANI, // 1128A..1128D
8394 UNKNOWN, // 1128E
8395 MULTANI, // 1128F..1129D
8396 UNKNOWN, // 1129E
8397 MULTANI, // 1129F..112A9
8398 UNKNOWN, // 112AA..112AF
8399 KHUDAWADI, // 112B0..112EA
8400 UNKNOWN, // 112EB..112EF
8401 KHUDAWADI, // 112F0..112F9
8402 UNKNOWN, // 112FA..112FF
8403 GRANTHA, // 11300..11303
8404 UNKNOWN, // 11304
8405 GRANTHA, // 11305..1130C
8406 UNKNOWN, // 1130D..1130E
8407 GRANTHA, // 1130F..11310
8408 UNKNOWN, // 11311..11312
8409 GRANTHA, // 11313..11328
8410 UNKNOWN, // 11329
8411 GRANTHA, // 1132A..11330
8412 UNKNOWN, // 11331
8413 GRANTHA, // 11332..11333
8414 UNKNOWN, // 11334
8415 GRANTHA, // 11335..11339
8416 UNKNOWN, // 1133A
8417 INHERITED, // 1133B
8418 GRANTHA, // 1133C..11344
8419 UNKNOWN, // 11345..11346
8420 GRANTHA, // 11347..11348
8421 UNKNOWN, // 11349..1134A
8422 GRANTHA, // 1134B..1134D
8423 UNKNOWN, // 1134E..1134F
8424 GRANTHA, // 11350
8425 UNKNOWN, // 11351..11356
8426 GRANTHA, // 11357
8427 UNKNOWN, // 11358..1135C
8428 GRANTHA, // 1135D..11363
8429 UNKNOWN, // 11364..11365
8430 GRANTHA, // 11366..1136C
8431 UNKNOWN, // 1136D..1136F
8432 GRANTHA, // 11370..11374
8433 UNKNOWN, // 11375..1137F
8434 TULU_TIGALARI, // 11380..11389
8435 UNKNOWN, // 1138A
8436 TULU_TIGALARI, // 1138B
8437 UNKNOWN, // 1138C..1138D
8438 TULU_TIGALARI, // 1138E
8439 UNKNOWN, // 1138F
8440 TULU_TIGALARI, // 11390..113B5
8441 UNKNOWN, // 113B6
8442 TULU_TIGALARI, // 113B7..113C0
8443 UNKNOWN, // 113C1
8444 TULU_TIGALARI, // 113C2
8445 UNKNOWN, // 113C3..113C4
8446 TULU_TIGALARI, // 113C5
8447 UNKNOWN, // 113C6
8448 TULU_TIGALARI, // 113C7..113CA
8449 UNKNOWN, // 113CB
8450 TULU_TIGALARI, // 113CC..113D5
8451 UNKNOWN, // 113D6
8452 TULU_TIGALARI, // 113D7..113D8
8453 UNKNOWN, // 113D9..113E0
8454 TULU_TIGALARI, // 113E1..113E2
8455 UNKNOWN, // 113E3..113FF
8456 NEWA, // 11400..1145B
8457 UNKNOWN, // 1145C
8458 NEWA, // 1145D..11461
8459 UNKNOWN, // 11462..1147F
8460 TIRHUTA, // 11480..114C7
8461 UNKNOWN, // 114C8..114CF
8462 TIRHUTA, // 114D0..114D9
8463 UNKNOWN, // 114DA..1157F
8464 SIDDHAM, // 11580..115B5
8465 UNKNOWN, // 115B6..115B7
8466 SIDDHAM, // 115B8..115DD
8467 UNKNOWN, // 115DE..115FF
8468 MODI, // 11600..11644
8469 UNKNOWN, // 11645..1164F
8470 MODI, // 11650..11659
8471 UNKNOWN, // 1165A..1165F
8472 MONGOLIAN, // 11660..1166C
8473 UNKNOWN, // 1166D..1167F
8474 TAKRI, // 11680..116B9
8475 UNKNOWN, // 116BA..116BF
8476 TAKRI, // 116C0..116C9
8477 UNKNOWN, // 116CA..116CF
8478 MYANMAR, // 116D0..116E3
8479 UNKNOWN, // 116E4..116FF
8480 AHOM, // 11700..1171A
8481 UNKNOWN, // 1171B..1171C
8482 AHOM, // 1171D..1172B
8483 UNKNOWN, // 1172C..1172F
8484 AHOM, // 11730..11746
8485 UNKNOWN, // 11747..117FF
8486 DOGRA, // 11800..1183B
8487 UNKNOWN, // 1183C..1189F
8488 WARANG_CITI, // 118A0..118F2
8489 UNKNOWN, // 118F3..118FE
8490 WARANG_CITI, // 118FF
8491 DIVES_AKURU, // 11900..11906
8492 UNKNOWN, // 11907..11908
8493 DIVES_AKURU, // 11909
8494 UNKNOWN, // 1190A..1190B
8495 DIVES_AKURU, // 1190C..11913
8496 UNKNOWN, // 11914
8497 DIVES_AKURU, // 11915..11916
8498 UNKNOWN, // 11917
8499 DIVES_AKURU, // 11918..11935
8500 UNKNOWN, // 11936
8501 DIVES_AKURU, // 11937..11938
8502 UNKNOWN, // 11939..1193A
8503 DIVES_AKURU, // 1193B..11946
8504 UNKNOWN, // 11947..1194F
8505 DIVES_AKURU, // 11950..11959
8506 UNKNOWN, // 1195A..1199F
8507 NANDINAGARI, // 119A0..119A7
8508 UNKNOWN, // 119A8..119A9
8509 NANDINAGARI, // 119AA..119D7
8510 UNKNOWN, // 119D8..119D9
8511 NANDINAGARI, // 119DA..119E4
8512 UNKNOWN, // 119E5..119FF
8513 ZANABAZAR_SQUARE, // 11A00..11A47
8514 UNKNOWN, // 11A48..11A4F
8515 SOYOMBO, // 11A50..11AA2
8516 UNKNOWN, // 11AA3..11AAF
8517 CANADIAN_ABORIGINAL, // 11AB0..11ABF
8518 PAU_CIN_HAU, // 11AC0..11AF8
8519 UNKNOWN, // 11AF9..11AFF
8520 DEVANAGARI, // 11B00..11B09
8521 UNKNOWN, // 11B0A..11BBF
8522 SUNUWAR, // 11BC0..11BE1
8523 UNKNOWN, // 11BE2..11BEF
8524 SUNUWAR, // 11BF0..11BF9
8525 UNKNOWN, // 11BFA..11BFF
8526 BHAIKSUKI, // 11C00..11C08
8527 UNKNOWN, // 11C09
8528 BHAIKSUKI, // 11C0A..11C36
8529 UNKNOWN, // 11C37
8530 BHAIKSUKI, // 11C38..11C45
8531 UNKNOWN, // 11C46..11C4F
8532 BHAIKSUKI, // 11C50..11C6C
8533 UNKNOWN, // 11C6D..11C6F
8534 MARCHEN, // 11C70..11C8F
8535 UNKNOWN, // 11C90..11C91
8536 MARCHEN, // 11C92..11CA7
8537 UNKNOWN, // 11CA8
8538 MARCHEN, // 11CA9..11CB6
8539 UNKNOWN, // 11CB7..11CFF
8540 MASARAM_GONDI, // 11D00..11D06
8541 UNKNOWN, // 11D07
8542 MASARAM_GONDI, // 11D08..11D09
8543 UNKNOWN, // 11D0A
8544 MASARAM_GONDI, // 11D0B..11D36
8545 UNKNOWN, // 11D37..11D39
8546 MASARAM_GONDI, // 11D3A
8547 UNKNOWN, // 11D3B
8548 MASARAM_GONDI, // 11D3C..11D3D
8549 UNKNOWN, // 11D3E
8550 MASARAM_GONDI, // 11D3F..11D47
8551 UNKNOWN, // 11D48..11D4F
8552 MASARAM_GONDI, // 11D50..11D59
8553 UNKNOWN, // 11D5A..11D5F
8554 GUNJALA_GONDI, // 11D60..11D65
8555 UNKNOWN, // 11D66
8556 GUNJALA_GONDI, // 11D67..11D68
8557 UNKNOWN, // 11D69
8558 GUNJALA_GONDI, // 11D6A..11D8E
8559 UNKNOWN, // 11D8F
8560 GUNJALA_GONDI, // 11D90..11D91
8561 UNKNOWN, // 11D92
8562 GUNJALA_GONDI, // 11D93..11D98
8563 UNKNOWN, // 11D99..11D9F
8564 GUNJALA_GONDI, // 11DA0..11DA9
8565 UNKNOWN, // 11DAA..11EDF
8566 MAKASAR, // 11EE0..11EF8
8567 UNKNOWN, // 11EF9..11EFF
8568 KAWI, // 11F00..11F10
8569 UNKNOWN, // 11F11
8570 KAWI, // 11F12..11F3A
8571 UNKNOWN, // 11F3B..11F3D
8572 KAWI, // 11F3E..11F5A
8573 UNKNOWN, // 11F5B..11FAF
8574 LISU, // 11FB0
8575 UNKNOWN, // 11FB1..11FBF
8576 TAMIL, // 11FC0..11FF1
8577 UNKNOWN, // 11FF2..11FFE
8578 TAMIL, // 11FFF
8579 CUNEIFORM, // 12000..12399
8580 UNKNOWN, // 1239A..123FF
8581 CUNEIFORM, // 12400..1246E
8582 UNKNOWN, // 1246F
8583 CUNEIFORM, // 12470..12474
8584 UNKNOWN, // 12475..1247F
8585 CUNEIFORM, // 12480..12543
8586 UNKNOWN, // 12544..12F8F
8587 CYPRO_MINOAN, // 12F90..12FF2
8588 UNKNOWN, // 12FF3..12FFF
8589 EGYPTIAN_HIEROGLYPHS, // 13000..13455
8590 UNKNOWN, // 13456..1345F
8591 EGYPTIAN_HIEROGLYPHS, // 13460..143FA
8592 UNKNOWN, // 143FB..143FF
8593 ANATOLIAN_HIEROGLYPHS, // 14400..14646
8594 UNKNOWN, // 14647..160FF
8595 GURUNG_KHEMA, // 16100..16139
8596 UNKNOWN, // 1613A..167FF
8597 BAMUM, // 16800..16A38
8598 UNKNOWN, // 16A39..16A3F
8599 MRO, // 16A40..16A5E
8600 UNKNOWN, // 16A5F
8601 MRO, // 16A60..16A69
8602 UNKNOWN, // 16A6A..16A6D
8603 MRO, // 16A6E..16A6F
8604 TANGSA, // 16A70..16ABE
8605 UNKNOWN, // 16ABF
8606 TANGSA, // 16AC0..16AC9
8607 UNKNOWN, // 16ACA..16ACF
8608 BASSA_VAH, // 16AD0..16AED
8609 UNKNOWN, // 16AEE..16AEF
8610 BASSA_VAH, // 16AF0..16AF5
8611 UNKNOWN, // 16AF6..16AFF
8612 PAHAWH_HMONG, // 16B00..16B45
8613 UNKNOWN, // 16B46..16B4F
8614 PAHAWH_HMONG, // 16B50..16B59
8615 UNKNOWN, // 16B5A
8616 PAHAWH_HMONG, // 16B5B..16B61
8617 UNKNOWN, // 16B62
8618 PAHAWH_HMONG, // 16B63..16B77
8619 UNKNOWN, // 16B78..16B7C
8620 PAHAWH_HMONG, // 16B7D..16B8F
8621 UNKNOWN, // 16B90..16D3F
8622 KIRAT_RAI, // 16D40..16D79
8623 UNKNOWN, // 16D7A..16E3F
8624 MEDEFAIDRIN, // 16E40..16E9A
8625 UNKNOWN, // 16E9B..16EFF
8626 MIAO, // 16F00..16F4A
8627 UNKNOWN, // 16F4B..16F4E
8628 MIAO, // 16F4F..16F87
8629 UNKNOWN, // 16F88..16F8E
8630 MIAO, // 16F8F..16F9F
8631 UNKNOWN, // 16FA0..16FDF
8632 TANGUT, // 16FE0
8633 NUSHU, // 16FE1
8634 HAN, // 16FE2..16FE3
8635 KHITAN_SMALL_SCRIPT, // 16FE4
8636 UNKNOWN, // 16FE5..16FEF
8637 HAN, // 16FF0..16FF1
8638 UNKNOWN, // 16FF2..16FFF
8639 TANGUT, // 17000..187F7
8640 UNKNOWN, // 187F8..187FF
8641 TANGUT, // 18800..18AFF
8642 KHITAN_SMALL_SCRIPT, // 18B00..18CD5
8643 UNKNOWN, // 18CD6..18CFE
8644 KHITAN_SMALL_SCRIPT, // 18CFF
8645 TANGUT, // 18D00..18D08
8646 UNKNOWN, // 18D09..1AFEF
8647 KATAKANA, // 1AFF0..1AFF3
8648 UNKNOWN, // 1AFF4
8649 KATAKANA, // 1AFF5..1AFFB
8650 UNKNOWN, // 1AFFC
8651 KATAKANA, // 1AFFD..1AFFE
8652 UNKNOWN, // 1AFFF
8653 KATAKANA, // 1B000
8654 HIRAGANA, // 1B001..1B11F
8655 KATAKANA, // 1B120..1B122
8656 UNKNOWN, // 1B123..1B131
8657 HIRAGANA, // 1B132
8658 UNKNOWN, // 1B133..1B14F
8659 HIRAGANA, // 1B150..1B152
8660 UNKNOWN, // 1B153..1B154
8661 KATAKANA, // 1B155
8662 UNKNOWN, // 1B156..1B163
8663 KATAKANA, // 1B164..1B167
8664 UNKNOWN, // 1B168..1B16F
8665 NUSHU, // 1B170..1B2FB
8666 UNKNOWN, // 1B2FC..1BBFF
8667 DUPLOYAN, // 1BC00..1BC6A
8668 UNKNOWN, // 1BC6B..1BC6F
8669 DUPLOYAN, // 1BC70..1BC7C
8670 UNKNOWN, // 1BC7D..1BC7F
8671 DUPLOYAN, // 1BC80..1BC88
8672 UNKNOWN, // 1BC89..1BC8F
8673 DUPLOYAN, // 1BC90..1BC99
8674 UNKNOWN, // 1BC9A..1BC9B
8675 DUPLOYAN, // 1BC9C..1BC9F
8676 COMMON, // 1BCA0..1BCA3
8677 UNKNOWN, // 1BCA4..1CBFF
8678 COMMON, // 1CC00..1CCF9
8679 UNKNOWN, // 1CCFA..1CCFF
8680 COMMON, // 1CD00..1CEB3
8681 UNKNOWN, // 1CEB4..1CEFF
8682 INHERITED, // 1CF00..1CF2D
8683 UNKNOWN, // 1CF2E..1CF2F
8684 INHERITED, // 1CF30..1CF46
8685 UNKNOWN, // 1CF47..1CF4F
8686 COMMON, // 1CF50..1CFC3
8687 UNKNOWN, // 1CFC4..1CFFF
8688 COMMON, // 1D000..1D0F5
8689 UNKNOWN, // 1D0F6..1D0FF
8690 COMMON, // 1D100..1D126
8691 UNKNOWN, // 1D127..1D128
8692 COMMON, // 1D129..1D166
8693 INHERITED, // 1D167..1D169
8694 COMMON, // 1D16A..1D17A
8695 INHERITED, // 1D17B..1D182
8696 COMMON, // 1D183..1D184
8697 INHERITED, // 1D185..1D18B
8698 COMMON, // 1D18C..1D1A9
8699 INHERITED, // 1D1AA..1D1AD
8700 COMMON, // 1D1AE..1D1EA
8701 UNKNOWN, // 1D1EB..1D1FF
8702 GREEK, // 1D200..1D245
8703 UNKNOWN, // 1D246..1D2BF
8704 COMMON, // 1D2C0..1D2D3
8705 UNKNOWN, // 1D2D4..1D2DF
8706 COMMON, // 1D2E0..1D2F3
8707 UNKNOWN, // 1D2F4..1D2FF
8708 COMMON, // 1D300..1D356
8709 UNKNOWN, // 1D357..1D35F
8710 COMMON, // 1D360..1D378
8711 UNKNOWN, // 1D379..1D3FF
8712 COMMON, // 1D400..1D454
8713 UNKNOWN, // 1D455
8714 COMMON, // 1D456..1D49C
8715 UNKNOWN, // 1D49D
8716 COMMON, // 1D49E..1D49F
8717 UNKNOWN, // 1D4A0..1D4A1
8718 COMMON, // 1D4A2
8719 UNKNOWN, // 1D4A3..1D4A4
8720 COMMON, // 1D4A5..1D4A6
8721 UNKNOWN, // 1D4A7..1D4A8
8722 COMMON, // 1D4A9..1D4AC
8723 UNKNOWN, // 1D4AD
8724 COMMON, // 1D4AE..1D4B9
8725 UNKNOWN, // 1D4BA
8726 COMMON, // 1D4BB
8727 UNKNOWN, // 1D4BC
8728 COMMON, // 1D4BD..1D4C3
8729 UNKNOWN, // 1D4C4
8730 COMMON, // 1D4C5..1D505
8731 UNKNOWN, // 1D506
8732 COMMON, // 1D507..1D50A
8733 UNKNOWN, // 1D50B..1D50C
8734 COMMON, // 1D50D..1D514
8735 UNKNOWN, // 1D515
8736 COMMON, // 1D516..1D51C
8737 UNKNOWN, // 1D51D
8738 COMMON, // 1D51E..1D539
8739 UNKNOWN, // 1D53A
8740 COMMON, // 1D53B..1D53E
8741 UNKNOWN, // 1D53F
8742 COMMON, // 1D540..1D544
8743 UNKNOWN, // 1D545
8744 COMMON, // 1D546
8745 UNKNOWN, // 1D547..1D549
8746 COMMON, // 1D54A..1D550
8747 UNKNOWN, // 1D551
8748 COMMON, // 1D552..1D6A5
8749 UNKNOWN, // 1D6A6..1D6A7
8750 COMMON, // 1D6A8..1D7CB
8751 UNKNOWN, // 1D7CC..1D7CD
8752 COMMON, // 1D7CE..1D7FF
8753 SIGNWRITING, // 1D800..1DA8B
8754 UNKNOWN, // 1DA8C..1DA9A
8755 SIGNWRITING, // 1DA9B..1DA9F
8756 UNKNOWN, // 1DAA0
8757 SIGNWRITING, // 1DAA1..1DAAF
8758 UNKNOWN, // 1DAB0..1DEFF
8759 LATIN, // 1DF00..1DF1E
8760 UNKNOWN, // 1DF1F..1DF24
8761 LATIN, // 1DF25..1DF2A
8762 UNKNOWN, // 1DF2B..1DFFF
8763 GLAGOLITIC, // 1E000..1E006
8764 UNKNOWN, // 1E007
8765 GLAGOLITIC, // 1E008..1E018
8766 UNKNOWN, // 1E019..1E01A
8767 GLAGOLITIC, // 1E01B..1E021
8768 UNKNOWN, // 1E022
8769 GLAGOLITIC, // 1E023..1E024
8770 UNKNOWN, // 1E025
8771 GLAGOLITIC, // 1E026..1E02A
8772 UNKNOWN, // 1E02B..1E02F
8773 CYRILLIC, // 1E030..1E06D
8774 UNKNOWN, // 1E06E..1E08E
8775 CYRILLIC, // 1E08F
8776 UNKNOWN, // 1E090..1E0FF
8777 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C
8778 UNKNOWN, // 1E12D..1E12F
8779 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D
8780 UNKNOWN, // 1E13E..1E13F
8781 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149
8782 UNKNOWN, // 1E14A..1E14D
8783 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F
8784 UNKNOWN, // 1E150..1E28F
8785 TOTO, // 1E290..1E2AE
8786 UNKNOWN, // 1E2AF..1E2BF
8787 WANCHO, // 1E2C0..1E2F9
8788 UNKNOWN, // 1E2FA..1E2FE
8789 WANCHO, // 1E2FF
8790 UNKNOWN, // 1E300..1E4CF
8791 NAG_MUNDARI, // 1E4D0..1E4F9
8792 UNKNOWN, // 1E4FA..1E5CF
8793 OL_ONAL, // 1E5D0..1E5FA
8794 UNKNOWN, // 1E5FB..1E5FE
8795 OL_ONAL, // 1E5FF
8796 UNKNOWN, // 1E600..1E7DF
8797 ETHIOPIC, // 1E7E0..1E7E6
8798 UNKNOWN, // 1E7E7
8799 ETHIOPIC, // 1E7E8..1E7EB
8800 UNKNOWN, // 1E7EC
8801 ETHIOPIC, // 1E7ED..1E7EE
8802 UNKNOWN, // 1E7EF
8803 ETHIOPIC, // 1E7F0..1E7FE
8804 UNKNOWN, // 1E7FF
8805 MENDE_KIKAKUI, // 1E800..1E8C4
8806 UNKNOWN, // 1E8C5..1E8C6
8807 MENDE_KIKAKUI, // 1E8C7..1E8D6
8808 UNKNOWN, // 1E8D7..1E8FF
8809 ADLAM, // 1E900..1E94B
8810 UNKNOWN, // 1E94C..1E94F
8811 ADLAM, // 1E950..1E959
8812 UNKNOWN, // 1E95A..1E95D
8813 ADLAM, // 1E95E..1E95F
8814 UNKNOWN, // 1E960..1EC70
8815 COMMON, // 1EC71..1ECB4
8816 UNKNOWN, // 1ECB5..1ED00
8817 COMMON, // 1ED01..1ED3D
8818 UNKNOWN, // 1ED3E..1EDFF
8819 ARABIC, // 1EE00..1EE03
8820 UNKNOWN, // 1EE04
8821 ARABIC, // 1EE05..1EE1F
8822 UNKNOWN, // 1EE20
8823 ARABIC, // 1EE21..1EE22
8824 UNKNOWN, // 1EE23
8825 ARABIC, // 1EE24
8826 UNKNOWN, // 1EE25..1EE26
8827 ARABIC, // 1EE27
8828 UNKNOWN, // 1EE28
8829 ARABIC, // 1EE29..1EE32
8830 UNKNOWN, // 1EE33
8831 ARABIC, // 1EE34..1EE37
8832 UNKNOWN, // 1EE38
8833 ARABIC, // 1EE39
8834 UNKNOWN, // 1EE3A
8835 ARABIC, // 1EE3B
8836 UNKNOWN, // 1EE3C..1EE41
8837 ARABIC, // 1EE42
8838 UNKNOWN, // 1EE43..1EE46
8839 ARABIC, // 1EE47
8840 UNKNOWN, // 1EE48
8841 ARABIC, // 1EE49
8842 UNKNOWN, // 1EE4A
8843 ARABIC, // 1EE4B
8844 UNKNOWN, // 1EE4C
8845 ARABIC, // 1EE4D..1EE4F
8846 UNKNOWN, // 1EE50
8847 ARABIC, // 1EE51..1EE52
8848 UNKNOWN, // 1EE53
8849 ARABIC, // 1EE54
8850 UNKNOWN, // 1EE55..1EE56
8851 ARABIC, // 1EE57
8852 UNKNOWN, // 1EE58
8853 ARABIC, // 1EE59
8854 UNKNOWN, // 1EE5A
8855 ARABIC, // 1EE5B
8856 UNKNOWN, // 1EE5C
8857 ARABIC, // 1EE5D
8858 UNKNOWN, // 1EE5E
8859 ARABIC, // 1EE5F
8860 UNKNOWN, // 1EE60
8861 ARABIC, // 1EE61..1EE62
8862 UNKNOWN, // 1EE63
8863 ARABIC, // 1EE64
8864 UNKNOWN, // 1EE65..1EE66
8865 ARABIC, // 1EE67..1EE6A
8866 UNKNOWN, // 1EE6B
8867 ARABIC, // 1EE6C..1EE72
8868 UNKNOWN, // 1EE73
8869 ARABIC, // 1EE74..1EE77
8870 UNKNOWN, // 1EE78
8871 ARABIC, // 1EE79..1EE7C
8872 UNKNOWN, // 1EE7D
8873 ARABIC, // 1EE7E
8874 UNKNOWN, // 1EE7F
8875 ARABIC, // 1EE80..1EE89
8876 UNKNOWN, // 1EE8A
8877 ARABIC, // 1EE8B..1EE9B
8878 UNKNOWN, // 1EE9C..1EEA0
8879 ARABIC, // 1EEA1..1EEA3
8880 UNKNOWN, // 1EEA4
8881 ARABIC, // 1EEA5..1EEA9
8882 UNKNOWN, // 1EEAA
8883 ARABIC, // 1EEAB..1EEBB
8884 UNKNOWN, // 1EEBC..1EEEF
8885 ARABIC, // 1EEF0..1EEF1
8886 UNKNOWN, // 1EEF2..1EFFF
8887 COMMON, // 1F000..1F02B
8888 UNKNOWN, // 1F02C..1F02F
8889 COMMON, // 1F030..1F093
8890 UNKNOWN, // 1F094..1F09F
8891 COMMON, // 1F0A0..1F0AE
8892 UNKNOWN, // 1F0AF..1F0B0
8893 COMMON, // 1F0B1..1F0BF
8894 UNKNOWN, // 1F0C0
8895 COMMON, // 1F0C1..1F0CF
8896 UNKNOWN, // 1F0D0
8897 COMMON, // 1F0D1..1F0F5
8898 UNKNOWN, // 1F0F6..1F0FF
8899 COMMON, // 1F100..1F1AD
8900 UNKNOWN, // 1F1AE..1F1E5
8901 COMMON, // 1F1E6..1F1FF
8902 HIRAGANA, // 1F200
8903 COMMON, // 1F201..1F202
8904 UNKNOWN, // 1F203..1F20F
8905 COMMON, // 1F210..1F23B
8906 UNKNOWN, // 1F23C..1F23F
8907 COMMON, // 1F240..1F248
8908 UNKNOWN, // 1F249..1F24F
8909 COMMON, // 1F250..1F251
8910 UNKNOWN, // 1F252..1F25F
8911 COMMON, // 1F260..1F265
8912 UNKNOWN, // 1F266..1F2FF
8913 COMMON, // 1F300..1F6D7
8914 UNKNOWN, // 1F6D8..1F6DB
8915 COMMON, // 1F6DC..1F6EC
8916 UNKNOWN, // 1F6ED..1F6EF
8917 COMMON, // 1F6F0..1F6FC
8918 UNKNOWN, // 1F6FD..1F6FF
8919 COMMON, // 1F700..1F776
8920 UNKNOWN, // 1F777..1F77A
8921 COMMON, // 1F77B..1F7D9
8922 UNKNOWN, // 1F7DA..1F7DF
8923 COMMON, // 1F7E0..1F7EB
8924 UNKNOWN, // 1F7EC..1F7EF
8925 COMMON, // 1F7F0
8926 UNKNOWN, // 1F7F1..1F7FF
8927 COMMON, // 1F800..1F80B
8928 UNKNOWN, // 1F80C..1F80F
8929 COMMON, // 1F810..1F847
8930 UNKNOWN, // 1F848..1F84F
8931 COMMON, // 1F850..1F859
8932 UNKNOWN, // 1F85A..1F85F
8933 COMMON, // 1F860..1F887
8934 UNKNOWN, // 1F888..1F88F
8935 COMMON, // 1F890..1F8AD
8936 UNKNOWN, // 1F8AE..1F8AF
8937 COMMON, // 1F8B0..1F8BB
8938 UNKNOWN, // 1F8BC..1F8BF
8939 COMMON, // 1F8C0..1F8C1
8940 UNKNOWN, // 1F8C2..1F8FF
8941 COMMON, // 1F900..1FA53
8942 UNKNOWN, // 1FA54..1FA5F
8943 COMMON, // 1FA60..1FA6D
8944 UNKNOWN, // 1FA6E..1FA6F
8945 COMMON, // 1FA70..1FA7C
8946 UNKNOWN, // 1FA7D..1FA7F
8947 COMMON, // 1FA80..1FA89
8948 UNKNOWN, // 1FA8A..1FA8E
8949 COMMON, // 1FA8F..1FAC6
8950 UNKNOWN, // 1FAC7..1FACD
8951 COMMON, // 1FACE..1FADC
8952 UNKNOWN, // 1FADD..1FADE
8953 COMMON, // 1FADF..1FAE9
8954 UNKNOWN, // 1FAEA..1FAEF
8955 COMMON, // 1FAF0..1FAF8
8956 UNKNOWN, // 1FAF9..1FAFF
8957 COMMON, // 1FB00..1FB92
8958 UNKNOWN, // 1FB93
8959 COMMON, // 1FB94..1FBF9
8960 UNKNOWN, // 1FBFA..1FFFF
8961 HAN, // 20000..2A6DF
8962 UNKNOWN, // 2A6E0..2A6FF
8963 HAN, // 2A700..2B739
8964 UNKNOWN, // 2B73A..2B73F
8965 HAN, // 2B740..2B81D
8966 UNKNOWN, // 2B81E..2B81F
8967 HAN, // 2B820..2CEA1
8968 UNKNOWN, // 2CEA2..2CEAF
8969 HAN, // 2CEB0..2EBE0
8970 UNKNOWN, // 2EBE1..2EBEF
8971 HAN, // 2EBF0..2EE5D
8972 UNKNOWN, // 2EE5E..2F7FF
8973 HAN, // 2F800..2FA1D
8974 UNKNOWN, // 2FA1E..2FFFF
8975 HAN, // 30000..3134A
8976 UNKNOWN, // 3134B..3134F
8977 HAN, // 31350..323AF
8978 UNKNOWN, // 323B0..E0000
8979 COMMON, // E0001
8980 UNKNOWN, // E0002..E001F
8981 COMMON, // E0020..E007F
8982 UNKNOWN, // E0080..E00FF
8983 INHERITED, // E0100..E01EF
8984 UNKNOWN, // E01F0..10FFFF
8985 };
8986
8987 private static final HashMap<String, Character.UnicodeScript> aliases;
8988 static {
8989 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1);
8990 aliases.put("ADLM", ADLAM);
8991 aliases.put("AGHB", CAUCASIAN_ALBANIAN);
8992 aliases.put("AHOM", AHOM);
8993 aliases.put("ARAB", ARABIC);
8994 aliases.put("ARMI", IMPERIAL_ARAMAIC);
8995 aliases.put("ARMN", ARMENIAN);
8996 aliases.put("AVST", AVESTAN);
8997 aliases.put("BALI", BALINESE);
8998 aliases.put("BAMU", BAMUM);
8999 aliases.put("BASS", BASSA_VAH);
9000 aliases.put("BATK", BATAK);
9001 aliases.put("BENG", BENGALI);
9002 aliases.put("BHKS", BHAIKSUKI);
9003 aliases.put("BOPO", BOPOMOFO);
9004 aliases.put("BRAH", BRAHMI);
9005 aliases.put("BRAI", BRAILLE);
9006 aliases.put("BUGI", BUGINESE);
9007 aliases.put("BUHD", BUHID);
9008 aliases.put("CAKM", CHAKMA);
9009 aliases.put("CANS", CANADIAN_ABORIGINAL);
9010 aliases.put("CARI", CARIAN);
9011 aliases.put("CHAM", CHAM);
9012 aliases.put("CHER", CHEROKEE);
9013 aliases.put("CHRS", CHORASMIAN);
9014 aliases.put("COPT", COPTIC);
9015 aliases.put("CPMN", CYPRO_MINOAN);
9016 aliases.put("CPRT", CYPRIOT);
9017 aliases.put("CYRL", CYRILLIC);
9018 aliases.put("DEVA", DEVANAGARI);
9019 aliases.put("DIAK", DIVES_AKURU);
9020 aliases.put("DOGR", DOGRA);
9021 aliases.put("DSRT", DESERET);
9022 aliases.put("DUPL", DUPLOYAN);
9023 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
9024 aliases.put("ELBA", ELBASAN);
9025 aliases.put("ELYM", ELYMAIC);
9026 aliases.put("ETHI", ETHIOPIC);
9027 aliases.put("GARA", GARAY);
9028 aliases.put("GEOR", GEORGIAN);
9029 aliases.put("GLAG", GLAGOLITIC);
9030 aliases.put("GONG", GUNJALA_GONDI);
9031 aliases.put("GONM", MASARAM_GONDI);
9032 aliases.put("GOTH", GOTHIC);
9033 aliases.put("GRAN", GRANTHA);
9034 aliases.put("GREK", GREEK);
9035 aliases.put("GUJR", GUJARATI);
9036 aliases.put("GUKH", GURUNG_KHEMA);
9037 aliases.put("GURU", GURMUKHI);
9038 aliases.put("HANG", HANGUL);
9039 aliases.put("HANI", HAN);
9040 aliases.put("HANO", HANUNOO);
9041 aliases.put("HATR", HATRAN);
9042 aliases.put("HEBR", HEBREW);
9043 aliases.put("HIRA", HIRAGANA);
9044 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
9045 aliases.put("HMNG", PAHAWH_HMONG);
9046 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
9047 aliases.put("HUNG", OLD_HUNGARIAN);
9048 aliases.put("ITAL", OLD_ITALIC);
9049 aliases.put("JAVA", JAVANESE);
9050 aliases.put("KALI", KAYAH_LI);
9051 aliases.put("KANA", KATAKANA);
9052 aliases.put("KAWI", KAWI);
9053 aliases.put("KHAR", KHAROSHTHI);
9054 aliases.put("KHMR", KHMER);
9055 aliases.put("KHOJ", KHOJKI);
9056 aliases.put("KITS", KHITAN_SMALL_SCRIPT);
9057 aliases.put("KNDA", KANNADA);
9058 aliases.put("KRAI", KIRAT_RAI);
9059 aliases.put("KTHI", KAITHI);
9060 aliases.put("LANA", TAI_THAM);
9061 aliases.put("LAOO", LAO);
9062 aliases.put("LATN", LATIN);
9063 aliases.put("LEPC", LEPCHA);
9064 aliases.put("LIMB", LIMBU);
9065 aliases.put("LINA", LINEAR_A);
9066 aliases.put("LINB", LINEAR_B);
9067 aliases.put("LISU", LISU);
9068 aliases.put("LYCI", LYCIAN);
9069 aliases.put("LYDI", LYDIAN);
9070 aliases.put("MAHJ", MAHAJANI);
9071 aliases.put("MAKA", MAKASAR);
9072 aliases.put("MAND", MANDAIC);
9073 aliases.put("MANI", MANICHAEAN);
9074 aliases.put("MARC", MARCHEN);
9075 aliases.put("MEDF", MEDEFAIDRIN);
9076 aliases.put("MEND", MENDE_KIKAKUI);
9077 aliases.put("MERC", MEROITIC_CURSIVE);
9078 aliases.put("MERO", MEROITIC_HIEROGLYPHS);
9079 aliases.put("MLYM", MALAYALAM);
9080 aliases.put("MODI", MODI);
9081 aliases.put("MONG", MONGOLIAN);
9082 aliases.put("MROO", MRO);
9083 aliases.put("MTEI", MEETEI_MAYEK);
9084 aliases.put("MULT", MULTANI);
9085 aliases.put("MYMR", MYANMAR);
9086 aliases.put("NAGM", NAG_MUNDARI);
9087 aliases.put("NAND", NANDINAGARI);
9088 aliases.put("NARB", OLD_NORTH_ARABIAN);
9089 aliases.put("NBAT", NABATAEAN);
9090 aliases.put("NEWA", NEWA);
9091 aliases.put("NKOO", NKO);
9092 aliases.put("NSHU", NUSHU);
9093 aliases.put("OGAM", OGHAM);
9094 aliases.put("OLCK", OL_CHIKI);
9095 aliases.put("ONAO", OL_ONAL);
9096 aliases.put("ORKH", OLD_TURKIC);
9097 aliases.put("ORYA", ORIYA);
9098 aliases.put("OSGE", OSAGE);
9099 aliases.put("OSMA", OSMANYA);
9100 aliases.put("OUGR", OLD_UYGHUR);
9101 aliases.put("PALM", PALMYRENE);
9102 aliases.put("PAUC", PAU_CIN_HAU);
9103 aliases.put("PERM", OLD_PERMIC);
9104 aliases.put("PHAG", PHAGS_PA);
9105 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
9106 aliases.put("PHLP", PSALTER_PAHLAVI);
9107 aliases.put("PHNX", PHOENICIAN);
9108 aliases.put("PLRD", MIAO);
9109 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
9110 aliases.put("RJNG", REJANG);
9111 aliases.put("ROHG", HANIFI_ROHINGYA);
9112 aliases.put("RUNR", RUNIC);
9113 aliases.put("SAMR", SAMARITAN);
9114 aliases.put("SARB", OLD_SOUTH_ARABIAN);
9115 aliases.put("SAUR", SAURASHTRA);
9116 aliases.put("SGNW", SIGNWRITING);
9117 aliases.put("SHAW", SHAVIAN);
9118 aliases.put("SHRD", SHARADA);
9119 aliases.put("SIDD", SIDDHAM);
9120 aliases.put("SIND", KHUDAWADI);
9121 aliases.put("SINH", SINHALA);
9122 aliases.put("SOGD", SOGDIAN);
9123 aliases.put("SOGO", OLD_SOGDIAN);
9124 aliases.put("SORA", SORA_SOMPENG);
9125 aliases.put("SOYO", SOYOMBO);
9126 aliases.put("SUND", SUNDANESE);
9127 aliases.put("SUNU", SUNUWAR);
9128 aliases.put("SYLO", SYLOTI_NAGRI);
9129 aliases.put("SYRC", SYRIAC);
9130 aliases.put("TAGB", TAGBANWA);
9131 aliases.put("TAKR", TAKRI);
9132 aliases.put("TALE", TAI_LE);
9133 aliases.put("TALU", NEW_TAI_LUE);
9134 aliases.put("TAML", TAMIL);
9135 aliases.put("TANG", TANGUT);
9136 aliases.put("TAVT", TAI_VIET);
9137 aliases.put("TELU", TELUGU);
9138 aliases.put("TFNG", TIFINAGH);
9139 aliases.put("TGLG", TAGALOG);
9140 aliases.put("THAA", THAANA);
9141 aliases.put("THAI", THAI);
9142 aliases.put("TIBT", TIBETAN);
9143 aliases.put("TIRH", TIRHUTA);
9144 aliases.put("TNSA", TANGSA);
9145 aliases.put("TODR", TODHRI);
9146 aliases.put("TOTO", TOTO);
9147 aliases.put("TUTG", TULU_TIGALARI);
9148 aliases.put("UGAR", UGARITIC);
9149 aliases.put("VAII", VAI);
9150 aliases.put("VITH", VITHKUQI);
9151 aliases.put("WARA", WARANG_CITI);
9152 aliases.put("WCHO", WANCHO);
9153 aliases.put("XPEO", OLD_PERSIAN);
9154 aliases.put("XSUX", CUNEIFORM);
9155 aliases.put("YEZI", YEZIDI);
9156 aliases.put("YIII", YI);
9157 aliases.put("ZANB", ZANABAZAR_SQUARE);
9158 aliases.put("ZINH", INHERITED);
9159 aliases.put("ZYYY", COMMON);
9160 aliases.put("ZZZZ", UNKNOWN);
9161 }
9162
9163 /**
9164 * Returns the enum constant representing the Unicode script of which
9165 * the given character (Unicode code point) is assigned to.
9166 *
9167 * @param codePoint the character (Unicode code point) in question.
9168 * @return The {@code UnicodeScript} constant representing the
9169 * Unicode script of which this character is assigned to.
9170 *
9171 * @throws IllegalArgumentException if the specified
9172 * {@code codePoint} is an invalid Unicode code point.
9173 * @see Character#isValidCodePoint(int)
9174 *
9175 */
9176 public static UnicodeScript of(int codePoint) {
9177 if (!isValidCodePoint(codePoint))
9178 throw new IllegalArgumentException(
9179 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9180 int type = getType(codePoint);
9181 // leave SURROGATE and PRIVATE_USE for table lookup
9182 if (type == UNASSIGNED)
9183 return UNKNOWN;
9184 int index = Arrays.binarySearch(scriptStarts, codePoint);
9185 if (index < 0)
9186 index = -index - 2;
9187 return scripts[index];
9188 }
9189
9190 /**
9191 * Returns the UnicodeScript constant with the given Unicode script
9192 * name or the script name alias. Script names and their aliases are
9193 * determined by The Unicode Standard. The files {@code Scripts.txt}
9194 * and {@code PropertyValueAliases.txt} define script names
9195 * and the script name aliases for a particular version of the
9196 * standard. The {@link Character} class specifies the version of
9197 * the standard that it supports.
9198 * <p>
9199 * Character case is ignored for all of the valid script names.
9200 * The en_US locale's case mapping rules are used to provide
9201 * case-insensitive string comparisons for script name validation.
9202 *
9203 * @param scriptName A {@code UnicodeScript} name.
9204 * @return The {@code UnicodeScript} constant identified
9205 * by {@code scriptName}
9206 * @throws IllegalArgumentException if {@code scriptName} is an
9207 * invalid name
9208 * @throws NullPointerException if {@code scriptName} is null
9209 */
9210 public static final UnicodeScript forName(String scriptName) {
9211 scriptName = scriptName.toUpperCase(Locale.ENGLISH);
9212 //.replace(' ', '_'));
9213 UnicodeScript sc = aliases.get(scriptName);
9214 if (sc != null)
9215 return sc;
9216 return valueOf(scriptName);
9217 }
9218 }
9219
9220 /**
9221 * The value of the {@code Character}.
9222 *
9223 * @serial
9224 */
9225 private final char value;
9226
9227 /** use serialVersionUID from JDK 1.0.2 for interoperability */
9228 @java.io.Serial
9229 private static final long serialVersionUID = 3786198910865385080L;
9230
9231 /**
9232 * Constructs a newly allocated {@code Character} object that
9233 * represents the specified {@code char} value.
9234 *
9235 * @param value the value to be represented by the
9236 * {@code Character} object.
9237 *
9238 * @deprecated
9239 * It is rarely appropriate to use this constructor. The static factory
9240 * {@link #valueOf(char)} is generally a better choice, as it is
9241 * likely to yield significantly better space and time performance.
9242 */
9243 @Deprecated(since="9")
9244 public Character(char value) {
9245 this.value = value;
9246 }
9247
9248 private static final class CharacterCache {
9249 private CharacterCache(){}
9250
9251 @Stable
9252 static final Character[] cache;
9253 static Character[] archivedCache;
9254
9255 static {
9256 int size = 127 + 1;
9257
9258 // Load and use the archived cache if it exists
9259 CDS.initializeFromArchive(CharacterCache.class);
9260 if (archivedCache == null) {
9261 Character[] c = new Character[size];
9262 for (int i = 0; i < size; i++) {
9263 c[i] = new Character((char) i);
9264 }
9265 archivedCache = c;
9266 }
9267 cache = archivedCache;
9268 assert cache.length == size;
9269 }
9270 }
9271
9272 /**
9273 * Returns a {@code Character} instance representing the specified
9274 * {@code char} value.
9275 * <div class="preview-block">
9276 * <div class="preview-comment">
9277 * <p>
9278 * - When preview features are NOT enabled, {@code Character} is an identity class.
9279 * If a new {@code Character} instance is not required, this method
9280 * should generally be used in preference to the constructor
9281 * {@link #Character(char)}, as this method is likely to yield
9282 * significantly better space and time performance by caching
9283 * frequently requested values.
9284 * This method will always cache values in the range {@code
9285 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
9286 * cache other values outside of this range.
9287 * </p>
9288 * <p>
9289 * - When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}.
9290 * The {@code valueOf} behavior is the same as invoking the constructor,
9291 * whether cached or not.
9292 * </p>
9293 * </div>
9294 * </div>
9295 *
9296 * @param c a char value.
9297 * @return a {@code Character} instance representing {@code c}.
9298 * @since 1.5
9299 */
9300 @IntrinsicCandidate
9301 @DeserializeConstructor
9302 public static Character valueOf(char c) {
9303 if (!PreviewFeatures.isEnabled()) {
9304 if (c <= 127) { // must cache
9305 return CharacterCache.cache[(int) c];
9306 }
9307 }
9308 return new Character(c);
9309 }
9310
9311 /**
9312 * Returns the value of this {@code Character} object.
9313 * @return the primitive {@code char} value represented by
9314 * this object.
9315 */
9316 @IntrinsicCandidate
9317 public char charValue() {
9318 return value;
9319 }
9320
9321 /**
9322 * Returns a hash code for this {@code Character}; equal to the result
9323 * of invoking {@code charValue()}.
9324 *
9325 * @return a hash code value for this {@code Character}
9326 */
9327 @Override
9328 public int hashCode() {
9329 return Character.hashCode(value);
9330 }
9331
9332 /**
9333 * Returns a hash code for a {@code char} value; compatible with
9334 * {@code Character.hashCode()}.
9335 *
9336 * @since 1.8
9337 *
9338 * @param value The {@code char} for which to return a hash code.
9339 * @return a hash code value for a {@code char} value.
9340 */
9341 public static int hashCode(char value) {
9342 return (int)value;
9343 }
9344
9345 /**
9346 * Compares this object against the specified object.
9347 * The result is {@code true} if and only if the argument is not
9348 * {@code null} and is a {@code Character} object that
9349 * represents the same {@code char} value as this object.
9350 *
9351 * @param obj the object to compare with.
9352 * @return {@code true} if the objects are the same;
9353 * {@code false} otherwise.
9354 */
9355 public boolean equals(Object obj) {
9356 if (obj instanceof Character c) {
9357 return value == c.charValue();
9358 }
9359 return false;
9360 }
9361
9362 /**
9363 * Returns a {@code String} object representing this
9364 * {@code Character}'s value. The result is a string of
9365 * length 1 whose sole component is the primitive
9366 * {@code char} value represented by this
9367 * {@code Character} object.
9368 *
9369 * @return a string representation of this object.
9370 */
9371 @Override
9372 public String toString() {
9373 return String.valueOf(value);
9374 }
9375
9376 /**
9377 * Returns a {@code String} object representing the
9378 * specified {@code char}. The result is a string of length
9379 * 1 consisting solely of the specified {@code char}.
9380 *
9381 * @apiNote This method cannot handle <a
9382 * href="#supplementary"> supplementary characters</a>. To support
9383 * all Unicode characters, including supplementary characters, use
9384 * the {@link #toString(int)} method.
9385 *
9386 * @param c the {@code char} to be converted
9387 * @return the string representation of the specified {@code char}
9388 * @since 1.4
9389 */
9390 public static String toString(char c) {
9391 return String.valueOf(c);
9392 }
9393
9394 /**
9395 * Returns a {@code String} object representing the
9396 * specified character (Unicode code point). The result is a string of
9397 * length 1 or 2, consisting solely of the specified {@code codePoint}.
9398 *
9399 * @param codePoint the {@code codePoint} to be converted
9400 * @return the string representation of the specified {@code codePoint}
9401 * @throws IllegalArgumentException if the specified
9402 * {@code codePoint} is not a {@linkplain #isValidCodePoint
9403 * valid Unicode code point}.
9404 * @since 11
9405 */
9406 public static String toString(int codePoint) {
9407 return String.valueOfCodePoint(codePoint);
9408 }
9409
9410 /**
9411 * Determines whether the specified code point is a valid
9412 * <a href="http://www.unicode.org/glossary/#code_point">
9413 * Unicode code point value</a>.
9414 *
9415 * @param codePoint the Unicode code point to be tested
9416 * @return {@code true} if the specified code point value is between
9417 * {@link #MIN_CODE_POINT} and
9418 * {@link #MAX_CODE_POINT} inclusive;
9419 * {@code false} otherwise.
9420 * @since 1.5
9421 */
9422 public static boolean isValidCodePoint(int codePoint) {
9423 // Optimized form of:
9424 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
9425 int plane = codePoint >>> 16;
9426 return plane < ((MAX_CODE_POINT + 1) >>> 16);
9427 }
9428
9429 /**
9430 * Determines whether the specified character (Unicode code point)
9431 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
9432 * Such code points can be represented using a single {@code char}.
9433 *
9434 * @param codePoint the character (Unicode code point) to be tested
9435 * @return {@code true} if the specified code point is between
9436 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
9437 * {@code false} otherwise.
9438 * @since 1.7
9439 */
9440 public static boolean isBmpCodePoint(int codePoint) {
9441 return codePoint >>> 16 == 0;
9442 // Optimized form of:
9443 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
9444 // We consistently use logical shift (>>>) to facilitate
9445 // additional runtime optimizations.
9446 }
9447
9448 /**
9449 * Determines whether the specified character (Unicode code point)
9450 * is in the <a href="#supplementary">supplementary character</a> range.
9451 *
9452 * @param codePoint the character (Unicode code point) to be tested
9453 * @return {@code true} if the specified code point is between
9454 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
9455 * {@link #MAX_CODE_POINT} inclusive;
9456 * {@code false} otherwise.
9457 * @since 1.5
9458 */
9459 public static boolean isSupplementaryCodePoint(int codePoint) {
9460 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
9461 && codePoint < MAX_CODE_POINT + 1;
9462 }
9463
9464 /**
9465 * Determines if the given {@code char} value is a
9466 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9467 * Unicode high-surrogate code unit</a>
9468 * (also known as <i>leading-surrogate code unit</i>).
9469 *
9470 * <p>Such values do not represent characters by themselves,
9471 * but are used in the representation of
9472 * <a href="#supplementary">supplementary characters</a>
9473 * in the UTF-16 encoding.
9474 *
9475 * @param ch the {@code char} value to be tested.
9476 * @return {@code true} if the {@code char} value is between
9477 * {@link #MIN_HIGH_SURROGATE} and
9478 * {@link #MAX_HIGH_SURROGATE} inclusive;
9479 * {@code false} otherwise.
9480 * @see Character#isLowSurrogate(char)
9481 * @see Character.UnicodeBlock#of(int)
9482 * @since 1.5
9483 */
9484 public static boolean isHighSurrogate(char ch) {
9485 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
9486 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
9487 }
9488
9489 /**
9490 * Determines if the given {@code char} value is a
9491 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9492 * Unicode low-surrogate code unit</a>
9493 * (also known as <i>trailing-surrogate code unit</i>).
9494 *
9495 * <p>Such values do not represent characters by themselves,
9496 * but are used in the representation of
9497 * <a href="#supplementary">supplementary characters</a>
9498 * in the UTF-16 encoding.
9499 *
9500 * @param ch the {@code char} value to be tested.
9501 * @return {@code true} if the {@code char} value is between
9502 * {@link #MIN_LOW_SURROGATE} and
9503 * {@link #MAX_LOW_SURROGATE} inclusive;
9504 * {@code false} otherwise.
9505 * @see Character#isHighSurrogate(char)
9506 * @since 1.5
9507 */
9508 public static boolean isLowSurrogate(char ch) {
9509 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
9510 }
9511
9512 /**
9513 * Determines if the given {@code char} value is a Unicode
9514 * <i>surrogate code unit</i>.
9515 *
9516 * <p>Such values do not represent characters by themselves,
9517 * but are used in the representation of
9518 * <a href="#supplementary">supplementary characters</a>
9519 * in the UTF-16 encoding.
9520 *
9521 * <p>A char value is a surrogate code unit if and only if it is either
9522 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
9523 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
9524 *
9525 * @param ch the {@code char} value to be tested.
9526 * @return {@code true} if the {@code char} value is between
9527 * {@link #MIN_SURROGATE} and
9528 * {@link #MAX_SURROGATE} inclusive;
9529 * {@code false} otherwise.
9530 * @since 1.7
9531 */
9532 public static boolean isSurrogate(char ch) {
9533 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
9534 }
9535
9536 /**
9537 * Determines whether the specified pair of {@code char}
9538 * values is a valid
9539 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9540 * Unicode surrogate pair</a>.
9541 *
9542 * <p>This method is equivalent to the expression:
9543 * <blockquote><pre>{@code
9544 * isHighSurrogate(high) && isLowSurrogate(low)
9545 * }</pre></blockquote>
9546 *
9547 * @param high the high-surrogate code value to be tested
9548 * @param low the low-surrogate code value to be tested
9549 * @return {@code true} if the specified high and
9550 * low-surrogate code values represent a valid surrogate pair;
9551 * {@code false} otherwise.
9552 * @since 1.5
9553 */
9554 public static boolean isSurrogatePair(char high, char low) {
9555 return isHighSurrogate(high) && isLowSurrogate(low);
9556 }
9557
9558 /**
9559 * Determines the number of {@code char} values needed to
9560 * represent the specified character (Unicode code point). If the
9561 * specified character is equal to or greater than 0x10000, then
9562 * the method returns 2. Otherwise, the method returns 1.
9563 *
9564 * <p>This method doesn't validate the specified character to be a
9565 * valid Unicode code point. The caller must validate the
9566 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
9567 * if necessary.
9568 *
9569 * @param codePoint the character (Unicode code point) to be tested.
9570 * @return 2 if the character is a valid supplementary character; 1 otherwise.
9571 * @see Character#isSupplementaryCodePoint(int)
9572 * @since 1.5
9573 */
9574 public static int charCount(int codePoint) {
9575 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
9576 }
9577
9578 /**
9579 * Converts the specified surrogate pair to its supplementary code
9580 * point value. This method does not validate the specified
9581 * surrogate pair. The caller must validate it using {@link
9582 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
9583 *
9584 * @param high the high-surrogate code unit
9585 * @param low the low-surrogate code unit
9586 * @return the supplementary code point composed from the
9587 * specified surrogate pair.
9588 * @since 1.5
9589 */
9590 public static int toCodePoint(char high, char low) {
9591 // Optimized form of:
9592 // return ((high - MIN_HIGH_SURROGATE) << 10)
9593 // + (low - MIN_LOW_SURROGATE)
9594 // + MIN_SUPPLEMENTARY_CODE_POINT;
9595 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
9596 - (MIN_HIGH_SURROGATE << 10)
9597 - MIN_LOW_SURROGATE);
9598 }
9599
9600 /**
9601 * Returns the code point at the given index of the
9602 * {@code CharSequence}. If the {@code char} value at
9603 * the given index in the {@code CharSequence} is in the
9604 * high-surrogate range, the following index is less than the
9605 * length of the {@code CharSequence}, and the
9606 * {@code char} value at the following index is in the
9607 * low-surrogate range, then the supplementary code point
9608 * corresponding to this surrogate pair is returned. Otherwise,
9609 * the {@code char} value at the given index is returned.
9610 *
9611 * @param seq a sequence of {@code char} values (Unicode code
9612 * units)
9613 * @param index the index to the {@code char} values (Unicode
9614 * code units) in {@code seq} to be converted
9615 * @return the Unicode code point at the given index
9616 * @throws NullPointerException if {@code seq} is null.
9617 * @throws IndexOutOfBoundsException if the value
9618 * {@code index} is negative or not less than
9619 * {@link CharSequence#length() seq.length()}.
9620 * @since 1.5
9621 */
9622 public static int codePointAt(CharSequence seq, int index) {
9623 char c1 = seq.charAt(index);
9624 if (isHighSurrogate(c1) && ++index < seq.length()) {
9625 char c2 = seq.charAt(index);
9626 if (isLowSurrogate(c2)) {
9627 return toCodePoint(c1, c2);
9628 }
9629 }
9630 return c1;
9631 }
9632
9633 /**
9634 * Returns the code point at the given index of the
9635 * {@code char} array. If the {@code char} value at
9636 * the given index in the {@code char} array is in the
9637 * high-surrogate range, the following index is less than the
9638 * length of the {@code char} array, and the
9639 * {@code char} value at the following index is in the
9640 * low-surrogate range, then the supplementary code point
9641 * corresponding to this surrogate pair is returned. Otherwise,
9642 * the {@code char} value at the given index is returned.
9643 *
9644 * @param a the {@code char} array
9645 * @param index the index to the {@code char} values (Unicode
9646 * code units) in the {@code char} array to be converted
9647 * @return the Unicode code point at the given index
9648 * @throws NullPointerException if {@code a} is null.
9649 * @throws IndexOutOfBoundsException if the value
9650 * {@code index} is negative or not less than
9651 * the length of the {@code char} array.
9652 * @since 1.5
9653 */
9654 public static int codePointAt(char[] a, int index) {
9655 return codePointAtImpl(a, index, a.length);
9656 }
9657
9658 /**
9659 * Returns the code point at the given index of the
9660 * {@code char} array, where only array elements with
9661 * {@code index} less than {@code limit} can be used. If
9662 * the {@code char} value at the given index in the
9663 * {@code char} array is in the high-surrogate range, the
9664 * following index is less than the {@code limit}, and the
9665 * {@code char} value at the following index is in the
9666 * low-surrogate range, then the supplementary code point
9667 * corresponding to this surrogate pair is returned. Otherwise,
9668 * the {@code char} value at the given index is returned.
9669 *
9670 * @param a the {@code char} array
9671 * @param index the index to the {@code char} values (Unicode
9672 * code units) in the {@code char} array to be converted
9673 * @param limit the index after the last array element that
9674 * can be used in the {@code char} array
9675 * @return the Unicode code point at the given index
9676 * @throws NullPointerException if {@code a} is null.
9677 * @throws IndexOutOfBoundsException if the {@code index}
9678 * argument is negative or not less than the {@code limit}
9679 * argument, or if the {@code limit} argument is negative or
9680 * greater than the length of the {@code char} array.
9681 * @since 1.5
9682 */
9683 public static int codePointAt(char[] a, int index, int limit) {
9684 if (index >= limit || index < 0 || limit > a.length) {
9685 throw new IndexOutOfBoundsException();
9686 }
9687 return codePointAtImpl(a, index, limit);
9688 }
9689
9690 // throws ArrayIndexOutOfBoundsException if index out of bounds
9691 static int codePointAtImpl(char[] a, int index, int limit) {
9692 char c1 = a[index];
9693 if (isHighSurrogate(c1) && ++index < limit) {
9694 char c2 = a[index];
9695 if (isLowSurrogate(c2)) {
9696 return toCodePoint(c1, c2);
9697 }
9698 }
9699 return c1;
9700 }
9701
9702 /**
9703 * Returns the code point preceding the given index of the
9704 * {@code CharSequence}. If the {@code char} value at
9705 * {@code (index - 1)} in the {@code CharSequence} is in
9706 * the low-surrogate range, {@code (index - 2)} is not
9707 * negative, and the {@code char} value at {@code (index - 2)}
9708 * in the {@code CharSequence} is in the
9709 * high-surrogate range, then the supplementary code point
9710 * corresponding to this surrogate pair is returned. Otherwise,
9711 * the {@code char} value at {@code (index - 1)} is
9712 * returned.
9713 *
9714 * @param seq the {@code CharSequence} instance
9715 * @param index the index following the code point that should be returned
9716 * @return the Unicode code point value before the given index.
9717 * @throws NullPointerException if {@code seq} is null.
9718 * @throws IndexOutOfBoundsException if the {@code index}
9719 * argument is less than 1 or greater than {@link
9720 * CharSequence#length() seq.length()}.
9721 * @since 1.5
9722 */
9723 public static int codePointBefore(CharSequence seq, int index) {
9724 char c2 = seq.charAt(--index);
9725 if (isLowSurrogate(c2) && index > 0) {
9726 char c1 = seq.charAt(--index);
9727 if (isHighSurrogate(c1)) {
9728 return toCodePoint(c1, c2);
9729 }
9730 }
9731 return c2;
9732 }
9733
9734 /**
9735 * Returns the code point preceding the given index of the
9736 * {@code char} array. If the {@code char} value at
9737 * {@code (index - 1)} in the {@code char} array is in
9738 * the low-surrogate range, {@code (index - 2)} is not
9739 * negative, and the {@code char} value at {@code (index - 2)}
9740 * in the {@code char} array is in the
9741 * high-surrogate range, then the supplementary code point
9742 * corresponding to this surrogate pair is returned. Otherwise,
9743 * the {@code char} value at {@code (index - 1)} is
9744 * returned.
9745 *
9746 * @param a the {@code char} array
9747 * @param index the index following the code point that should be returned
9748 * @return the Unicode code point value before the given index.
9749 * @throws NullPointerException if {@code a} is null.
9750 * @throws IndexOutOfBoundsException if the {@code index}
9751 * argument is less than 1 or greater than the length of the
9752 * {@code char} array
9753 * @since 1.5
9754 */
9755 public static int codePointBefore(char[] a, int index) {
9756 return codePointBeforeImpl(a, index, 0);
9757 }
9758
9759 /**
9760 * Returns the code point preceding the given index of the
9761 * {@code char} array, where only array elements with
9762 * {@code index} greater than or equal to {@code start}
9763 * can be used. If the {@code char} value at {@code (index - 1)}
9764 * in the {@code char} array is in the
9765 * low-surrogate range, {@code (index - 2)} is not less than
9766 * {@code start}, and the {@code char} value at
9767 * {@code (index - 2)} in the {@code char} array is in
9768 * the high-surrogate range, then the supplementary code point
9769 * corresponding to this surrogate pair is returned. Otherwise,
9770 * the {@code char} value at {@code (index - 1)} is
9771 * returned.
9772 *
9773 * @param a the {@code char} array
9774 * @param index the index following the code point that should be returned
9775 * @param start the index of the first array element in the
9776 * {@code char} array
9777 * @return the Unicode code point value before the given index.
9778 * @throws NullPointerException if {@code a} is null.
9779 * @throws IndexOutOfBoundsException if the {@code index}
9780 * argument is not greater than the {@code start} argument or
9781 * is greater than the length of the {@code char} array, or
9782 * if the {@code start} argument is negative or not less than
9783 * the length of the {@code char} array.
9784 * @since 1.5
9785 */
9786 public static int codePointBefore(char[] a, int index, int start) {
9787 if (index <= start || start < 0 || index > a.length) {
9788 throw new IndexOutOfBoundsException();
9789 }
9790 return codePointBeforeImpl(a, index, start);
9791 }
9792
9793 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
9794 static int codePointBeforeImpl(char[] a, int index, int start) {
9795 char c2 = a[--index];
9796 if (isLowSurrogate(c2) && index > start) {
9797 char c1 = a[--index];
9798 if (isHighSurrogate(c1)) {
9799 return toCodePoint(c1, c2);
9800 }
9801 }
9802 return c2;
9803 }
9804
9805 /**
9806 * Returns the leading surrogate (a
9807 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9808 * high surrogate code unit</a>) of the
9809 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9810 * surrogate pair</a>
9811 * representing the specified supplementary character (Unicode
9812 * code point) in the UTF-16 encoding. If the specified character
9813 * is not a
9814 * <a href="Character.html#supplementary">supplementary character</a>,
9815 * an unspecified {@code char} is returned.
9816 *
9817 * <p>If
9818 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9819 * is {@code true}, then
9820 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
9821 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
9822 * are also always {@code true}.
9823 *
9824 * @param codePoint a supplementary character (Unicode code point)
9825 * @return the leading surrogate code unit used to represent the
9826 * character in the UTF-16 encoding
9827 * @since 1.7
9828 */
9829 public static char highSurrogate(int codePoint) {
9830 return (char) ((codePoint >>> 10)
9831 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
9832 }
9833
9834 /**
9835 * Returns the trailing surrogate (a
9836 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9837 * low surrogate code unit</a>) of the
9838 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9839 * surrogate pair</a>
9840 * representing the specified supplementary character (Unicode
9841 * code point) in the UTF-16 encoding. If the specified character
9842 * is not a
9843 * <a href="Character.html#supplementary">supplementary character</a>,
9844 * an unspecified {@code char} is returned.
9845 *
9846 * <p>If
9847 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9848 * is {@code true}, then
9849 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
9850 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
9851 * are also always {@code true}.
9852 *
9853 * @param codePoint a supplementary character (Unicode code point)
9854 * @return the trailing surrogate code unit used to represent the
9855 * character in the UTF-16 encoding
9856 * @since 1.7
9857 */
9858 public static char lowSurrogate(int codePoint) {
9859 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
9860 }
9861
9862 /**
9863 * Converts the specified character (Unicode code point) to its
9864 * UTF-16 representation. If the specified code point is a BMP
9865 * (Basic Multilingual Plane or Plane 0) value, the same value is
9866 * stored in {@code dst[dstIndex]}, and 1 is returned. If the
9867 * specified code point is a supplementary character, its
9868 * surrogate values are stored in {@code dst[dstIndex]}
9869 * (high-surrogate) and {@code dst[dstIndex+1]}
9870 * (low-surrogate), and 2 is returned.
9871 *
9872 * @param codePoint the character (Unicode code point) to be converted.
9873 * @param dst an array of {@code char} in which the
9874 * {@code codePoint}'s UTF-16 value is stored.
9875 * @param dstIndex the start index into the {@code dst}
9876 * array where the converted value is stored.
9877 * @return 1 if the code point is a BMP code point, 2 if the
9878 * code point is a supplementary code point.
9879 * @throws IllegalArgumentException if the specified
9880 * {@code codePoint} is not a valid Unicode code point.
9881 * @throws NullPointerException if the specified {@code dst} is null.
9882 * @throws IndexOutOfBoundsException if {@code dstIndex}
9883 * is negative or not less than {@code dst.length}, or if
9884 * {@code dst} at {@code dstIndex} doesn't have enough
9885 * array element(s) to store the resulting {@code char}
9886 * value(s). (If {@code dstIndex} is equal to
9887 * {@code dst.length-1} and the specified
9888 * {@code codePoint} is a supplementary character, the
9889 * high-surrogate value is not stored in
9890 * {@code dst[dstIndex]}.)
9891 * @since 1.5
9892 */
9893 public static int toChars(int codePoint, char[] dst, int dstIndex) {
9894 if (isBmpCodePoint(codePoint)) {
9895 dst[dstIndex] = (char) codePoint;
9896 return 1;
9897 } else if (isValidCodePoint(codePoint)) {
9898 toSurrogates(codePoint, dst, dstIndex);
9899 return 2;
9900 } else {
9901 throw new IllegalArgumentException(
9902 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9903 }
9904 }
9905
9906 /**
9907 * Converts the specified character (Unicode code point) to its
9908 * UTF-16 representation stored in a {@code char} array. If
9909 * the specified code point is a BMP (Basic Multilingual Plane or
9910 * Plane 0) value, the resulting {@code char} array has
9911 * the same value as {@code codePoint}. If the specified code
9912 * point is a supplementary code point, the resulting
9913 * {@code char} array has the corresponding surrogate pair.
9914 *
9915 * @param codePoint a Unicode code point
9916 * @return a {@code char} array having
9917 * {@code codePoint}'s UTF-16 representation.
9918 * @throws IllegalArgumentException if the specified
9919 * {@code codePoint} is not a valid Unicode code point.
9920 * @since 1.5
9921 */
9922 public static char[] toChars(int codePoint) {
9923 if (isBmpCodePoint(codePoint)) {
9924 return new char[] { (char) codePoint };
9925 } else if (isValidCodePoint(codePoint)) {
9926 char[] result = new char[2];
9927 toSurrogates(codePoint, result, 0);
9928 return result;
9929 } else {
9930 throw new IllegalArgumentException(
9931 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9932 }
9933 }
9934
9935 static void toSurrogates(int codePoint, char[] dst, int index) {
9936 // We write elements "backwards" to guarantee all-or-nothing
9937 dst[index+1] = lowSurrogate(codePoint);
9938 dst[index] = highSurrogate(codePoint);
9939 }
9940
9941 /**
9942 * Returns the number of Unicode code points in the text range of
9943 * the specified char sequence. The text range begins at the
9944 * specified {@code beginIndex} and extends to the
9945 * {@code char} at index {@code endIndex - 1}. Thus the
9946 * length (in {@code char}s) of the text range is
9947 * {@code endIndex-beginIndex}. Unpaired surrogates within
9948 * the text range count as one code point each.
9949 *
9950 * @param seq the char sequence
9951 * @param beginIndex the index to the first {@code char} of
9952 * the text range.
9953 * @param endIndex the index after the last {@code char} of
9954 * the text range.
9955 * @return the number of Unicode code points in the specified text
9956 * range
9957 * @throws NullPointerException if {@code seq} is null.
9958 * @throws IndexOutOfBoundsException if the
9959 * {@code beginIndex} is negative, or {@code endIndex}
9960 * is larger than the length of the given sequence, or
9961 * {@code beginIndex} is larger than {@code endIndex}.
9962 * @since 1.5
9963 */
9964 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
9965 Objects.checkFromToIndex(beginIndex, endIndex, seq.length());
9966 int n = endIndex - beginIndex;
9967 for (int i = beginIndex; i < endIndex; ) {
9968 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
9969 isLowSurrogate(seq.charAt(i))) {
9970 n--;
9971 i++;
9972 }
9973 }
9974 return n;
9975 }
9976
9977 /**
9978 * Returns the number of Unicode code points in a subarray of the
9979 * {@code char} array argument. The {@code offset}
9980 * argument is the index of the first {@code char} of the
9981 * subarray and the {@code count} argument specifies the
9982 * length of the subarray in {@code char}s. Unpaired
9983 * surrogates within the subarray count as one code point each.
9984 *
9985 * @param a the {@code char} array
9986 * @param offset the index of the first {@code char} in the
9987 * given {@code char} array
9988 * @param count the length of the subarray in {@code char}s
9989 * @return the number of Unicode code points in the specified subarray
9990 * @throws NullPointerException if {@code a} is null.
9991 * @throws IndexOutOfBoundsException if {@code offset} or
9992 * {@code count} is negative, or if {@code offset +
9993 * count} is larger than the length of the given array.
9994 * @since 1.5
9995 */
9996 public static int codePointCount(char[] a, int offset, int count) {
9997 Objects.checkFromIndexSize(offset, count, a.length);
9998 return codePointCountImpl(a, offset, count);
9999 }
10000
10001 static int codePointCountImpl(char[] a, int offset, int count) {
10002 int endIndex = offset + count;
10003 int n = count;
10004 for (int i = offset; i < endIndex; ) {
10005 if (isHighSurrogate(a[i++]) && i < endIndex &&
10006 isLowSurrogate(a[i])) {
10007 n--;
10008 i++;
10009 }
10010 }
10011 return n;
10012 }
10013
10014 /**
10015 * Returns the index within the given char sequence that is offset
10016 * from the given {@code index} by {@code codePointOffset}
10017 * code points. Unpaired surrogates within the text range given by
10018 * {@code index} and {@code codePointOffset} count as
10019 * one code point each.
10020 *
10021 * @param seq the char sequence
10022 * @param index the index to be offset
10023 * @param codePointOffset the offset in code points
10024 * @return the index within the char sequence
10025 * @throws NullPointerException if {@code seq} is null.
10026 * @throws IndexOutOfBoundsException if {@code index}
10027 * is negative or larger than the length of the char sequence,
10028 * or if {@code codePointOffset} is positive and the
10029 * subsequence starting with {@code index} has fewer than
10030 * {@code codePointOffset} code points, or if
10031 * {@code codePointOffset} is negative and the subsequence
10032 * before {@code index} has fewer than the absolute value
10033 * of {@code codePointOffset} code points.
10034 * @since 1.5
10035 */
10036 public static int offsetByCodePoints(CharSequence seq, int index,
10037 int codePointOffset) {
10038 int length = seq.length();
10039 if (index < 0 || index > length) {
10040 throw new IndexOutOfBoundsException();
10041 }
10042
10043 int x = index;
10044 if (codePointOffset >= 0) {
10045 int i;
10046 for (i = 0; x < length && i < codePointOffset; i++) {
10047 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
10048 isLowSurrogate(seq.charAt(x))) {
10049 x++;
10050 }
10051 }
10052 if (i < codePointOffset) {
10053 throw new IndexOutOfBoundsException();
10054 }
10055 } else {
10056 int i;
10057 for (i = codePointOffset; x > 0 && i < 0; i++) {
10058 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
10059 isHighSurrogate(seq.charAt(x-1))) {
10060 x--;
10061 }
10062 }
10063 if (i < 0) {
10064 throw new IndexOutOfBoundsException();
10065 }
10066 }
10067 return x;
10068 }
10069
10070 /**
10071 * Returns the index within the given {@code char} subarray
10072 * that is offset from the given {@code index} by
10073 * {@code codePointOffset} code points. The
10074 * {@code start} and {@code count} arguments specify a
10075 * subarray of the {@code char} array. Unpaired surrogates
10076 * within the text range given by {@code index} and
10077 * {@code codePointOffset} count as one code point each.
10078 *
10079 * @param a the {@code char} array
10080 * @param start the index of the first {@code char} of the
10081 * subarray
10082 * @param count the length of the subarray in {@code char}s
10083 * @param index the index to be offset
10084 * @param codePointOffset the offset in code points
10085 * @return the index within the subarray
10086 * @throws NullPointerException if {@code a} is null.
10087 * @throws IndexOutOfBoundsException
10088 * if {@code start} or {@code count} is negative,
10089 * or if {@code start + count} is larger than the length of
10090 * the given array,
10091 * or if {@code index} is less than {@code start} or
10092 * larger then {@code start + count},
10093 * or if {@code codePointOffset} is positive and the text range
10094 * starting with {@code index} and ending with {@code start + count - 1}
10095 * has fewer than {@code codePointOffset} code
10096 * points,
10097 * or if {@code codePointOffset} is negative and the text range
10098 * starting with {@code start} and ending with {@code index - 1}
10099 * has fewer than the absolute value of
10100 * {@code codePointOffset} code points.
10101 * @since 1.5
10102 */
10103 public static int offsetByCodePoints(char[] a, int start, int count,
10104 int index, int codePointOffset) {
10105 if (count > a.length-start || start < 0 || count < 0
10106 || index < start || index > start+count) {
10107 throw new IndexOutOfBoundsException();
10108 }
10109 return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
10110 }
10111
10112 static int offsetByCodePointsImpl(char[]a, int start, int count,
10113 int index, int codePointOffset) {
10114 int x = index;
10115 if (codePointOffset >= 0) {
10116 int limit = start + count;
10117 int i;
10118 for (i = 0; x < limit && i < codePointOffset; i++) {
10119 if (isHighSurrogate(a[x++]) && x < limit &&
10120 isLowSurrogate(a[x])) {
10121 x++;
10122 }
10123 }
10124 if (i < codePointOffset) {
10125 throw new IndexOutOfBoundsException();
10126 }
10127 } else {
10128 int i;
10129 for (i = codePointOffset; x > start && i < 0; i++) {
10130 if (isLowSurrogate(a[--x]) && x > start &&
10131 isHighSurrogate(a[x-1])) {
10132 x--;
10133 }
10134 }
10135 if (i < 0) {
10136 throw new IndexOutOfBoundsException();
10137 }
10138 }
10139 return x;
10140 }
10141
10142 /**
10143 * Determines if the specified character is a lowercase character.
10144 * <p>
10145 * A character is lowercase if its general category type, provided
10146 * by {@code Character.getType(ch)}, is
10147 * {@code LOWERCASE_LETTER}, or it has contributory property
10148 * Other_Lowercase as defined by the Unicode Standard.
10149 * <p>
10150 * The following are examples of lowercase characters:
10151 * <blockquote><pre>
10152 * a b c d e f g h i j k l m n o p q r s t u v w x y z
10153 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
10154 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
10155 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
10156 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
10157 * </pre></blockquote>
10158 * <p> Many other Unicode characters are lowercase too.
10159 *
10160 * <p><b>Note:</b> This method cannot handle <a
10161 * href="#supplementary"> supplementary characters</a>. To support
10162 * all Unicode characters, including supplementary characters, use
10163 * the {@link #isLowerCase(int)} method.
10164 *
10165 * @param ch the character to be tested.
10166 * @return {@code true} if the character is lowercase;
10167 * {@code false} otherwise.
10168 * @see Character#isLowerCase(char)
10169 * @see Character#isTitleCase(char)
10170 * @see Character#toLowerCase(char)
10171 * @see Character#getType(char)
10172 */
10173 public static boolean isLowerCase(char ch) {
10174 return isLowerCase((int)ch);
10175 }
10176
10177 /**
10178 * Determines if the specified character (Unicode code point) is a
10179 * lowercase character.
10180 * <p>
10181 * A character is lowercase if its general category type, provided
10182 * by {@link Character#getType getType(codePoint)}, is
10183 * {@code LOWERCASE_LETTER}, or it has contributory property
10184 * Other_Lowercase as defined by the Unicode Standard.
10185 * <p>
10186 * The following are examples of lowercase characters:
10187 * <blockquote><pre>
10188 * a b c d e f g h i j k l m n o p q r s t u v w x y z
10189 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
10190 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
10191 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
10192 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
10193 * </pre></blockquote>
10194 * <p> Many other Unicode characters are lowercase too.
10195 *
10196 * @param codePoint the character (Unicode code point) to be tested.
10197 * @return {@code true} if the character is lowercase;
10198 * {@code false} otherwise.
10199 * @see Character#isLowerCase(int)
10200 * @see Character#isTitleCase(int)
10201 * @see Character#toLowerCase(int)
10202 * @see Character#getType(int)
10203 * @since 1.5
10204 */
10205 public static boolean isLowerCase(int codePoint) {
10206 return CharacterData.of(codePoint).isLowerCase(codePoint);
10207 }
10208
10209 /**
10210 * Determines if the specified character is an uppercase character.
10211 * <p>
10212 * A character is uppercase if its general category type, provided by
10213 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
10214 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
10215 * <p>
10216 * The following are examples of uppercase characters:
10217 * <blockquote><pre>
10218 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
10219 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
10220 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
10221 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
10222 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
10223 * </pre></blockquote>
10224 * <p> Many other Unicode characters are uppercase too.
10225 *
10226 * <p><b>Note:</b> This method cannot handle <a
10227 * href="#supplementary"> supplementary characters</a>. To support
10228 * all Unicode characters, including supplementary characters, use
10229 * the {@link #isUpperCase(int)} method.
10230 *
10231 * @param ch the character to be tested.
10232 * @return {@code true} if the character is uppercase;
10233 * {@code false} otherwise.
10234 * @see Character#isLowerCase(char)
10235 * @see Character#isTitleCase(char)
10236 * @see Character#toUpperCase(char)
10237 * @see Character#getType(char)
10238 * @since 1.0
10239 */
10240 public static boolean isUpperCase(char ch) {
10241 return isUpperCase((int)ch);
10242 }
10243
10244 /**
10245 * Determines if the specified character (Unicode code point) is an uppercase character.
10246 * <p>
10247 * A character is uppercase if its general category type, provided by
10248 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
10249 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
10250 * <p>
10251 * The following are examples of uppercase characters:
10252 * <blockquote><pre>
10253 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
10254 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
10255 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
10256 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
10257 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
10258 * </pre></blockquote>
10259 * <p> Many other Unicode characters are uppercase too.
10260 *
10261 * @param codePoint the character (Unicode code point) to be tested.
10262 * @return {@code true} if the character is uppercase;
10263 * {@code false} otherwise.
10264 * @see Character#isLowerCase(int)
10265 * @see Character#isTitleCase(int)
10266 * @see Character#toUpperCase(int)
10267 * @see Character#getType(int)
10268 * @since 1.5
10269 */
10270 public static boolean isUpperCase(int codePoint) {
10271 return CharacterData.of(codePoint).isUpperCase(codePoint);
10272 }
10273
10274 /**
10275 * Determines if the specified character is a titlecase character.
10276 * <p>
10277 * A character is a titlecase character if its general
10278 * category type, provided by {@code Character.getType(ch)},
10279 * is {@code TITLECASE_LETTER}.
10280 * <p>
10281 * Some characters look like pairs of Latin letters. For example, there
10282 * is an uppercase letter that looks like "LJ" and has a corresponding
10283 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10284 * is the appropriate form to use when rendering a word in lowercase
10285 * with initial capitals, as for a book title.
10286 * <p>
10287 * These are some of the Unicode characters for which this method returns
10288 * {@code true}:
10289 * <ul>
10290 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10291 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10292 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10293 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10294 * </ul>
10295 * <p> Many other Unicode characters are titlecase too.
10296 *
10297 * <p><b>Note:</b> This method cannot handle <a
10298 * href="#supplementary"> supplementary characters</a>. To support
10299 * all Unicode characters, including supplementary characters, use
10300 * the {@link #isTitleCase(int)} method.
10301 *
10302 * @param ch the character to be tested.
10303 * @return {@code true} if the character is titlecase;
10304 * {@code false} otherwise.
10305 * @see Character#isLowerCase(char)
10306 * @see Character#isUpperCase(char)
10307 * @see Character#toTitleCase(char)
10308 * @see Character#getType(char)
10309 * @since 1.0.2
10310 */
10311 public static boolean isTitleCase(char ch) {
10312 return isTitleCase((int)ch);
10313 }
10314
10315 /**
10316 * Determines if the specified character (Unicode code point) is a titlecase character.
10317 * <p>
10318 * A character is a titlecase character if its general
10319 * category type, provided by {@link Character#getType(int) getType(codePoint)},
10320 * is {@code TITLECASE_LETTER}.
10321 * <p>
10322 * Some characters look like pairs of Latin letters. For example, there
10323 * is an uppercase letter that looks like "LJ" and has a corresponding
10324 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10325 * is the appropriate form to use when rendering a word in lowercase
10326 * with initial capitals, as for a book title.
10327 * <p>
10328 * These are some of the Unicode characters for which this method returns
10329 * {@code true}:
10330 * <ul>
10331 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10332 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10333 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10334 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10335 * </ul>
10336 * <p> Many other Unicode characters are titlecase too.
10337 *
10338 * @param codePoint the character (Unicode code point) to be tested.
10339 * @return {@code true} if the character is titlecase;
10340 * {@code false} otherwise.
10341 * @see Character#isLowerCase(int)
10342 * @see Character#isUpperCase(int)
10343 * @see Character#toTitleCase(int)
10344 * @see Character#getType(int)
10345 * @since 1.5
10346 */
10347 public static boolean isTitleCase(int codePoint) {
10348 return getType(codePoint) == Character.TITLECASE_LETTER;
10349 }
10350
10351 /**
10352 * Determines if the specified character is a digit.
10353 * <p>
10354 * A character is a digit if its general category type, provided
10355 * by {@code Character.getType(ch)}, is
10356 * {@code DECIMAL_DIGIT_NUMBER}.
10357 * <p>
10358 * Some Unicode character ranges that contain digits:
10359 * <ul>
10360 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10361 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10362 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10363 * Arabic-Indic digits
10364 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10365 * Extended Arabic-Indic digits
10366 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10367 * Devanagari digits
10368 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10369 * Fullwidth digits
10370 * </ul>
10371 *
10372 * Many other character ranges contain digits as well.
10373 *
10374 * <p><b>Note:</b> This method cannot handle <a
10375 * href="#supplementary"> supplementary characters</a>. To support
10376 * all Unicode characters, including supplementary characters, use
10377 * the {@link #isDigit(int)} method.
10378 *
10379 * @param ch the character to be tested.
10380 * @return {@code true} if the character is a digit;
10381 * {@code false} otherwise.
10382 * @see Character#digit(char, int)
10383 * @see Character#forDigit(int, int)
10384 * @see Character#getType(char)
10385 */
10386 public static boolean isDigit(char ch) {
10387 return isDigit((int)ch);
10388 }
10389
10390 /**
10391 * Determines if the specified character (Unicode code point) is a digit.
10392 * <p>
10393 * A character is a digit if its general category type, provided
10394 * by {@link Character#getType(int) getType(codePoint)}, is
10395 * {@code DECIMAL_DIGIT_NUMBER}.
10396 * <p>
10397 * Some Unicode character ranges that contain digits:
10398 * <ul>
10399 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10400 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10401 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10402 * Arabic-Indic digits
10403 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10404 * Extended Arabic-Indic digits
10405 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10406 * Devanagari digits
10407 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10408 * Fullwidth digits
10409 * </ul>
10410 *
10411 * Many other character ranges contain digits as well.
10412 *
10413 * @param codePoint the character (Unicode code point) to be tested.
10414 * @return {@code true} if the character is a digit;
10415 * {@code false} otherwise.
10416 * @see Character#forDigit(int, int)
10417 * @see Character#getType(int)
10418 * @since 1.5
10419 */
10420 public static boolean isDigit(int codePoint) {
10421 return CharacterData.of(codePoint).isDigit(codePoint);
10422 }
10423
10424 /**
10425 * Determines if a character is defined in Unicode.
10426 * <p>
10427 * A character is defined if at least one of the following is true:
10428 * <ul>
10429 * <li>It has an entry in the UnicodeData file.
10430 * <li>It has a value in a range defined by the UnicodeData file.
10431 * </ul>
10432 *
10433 * <p><b>Note:</b> This method cannot handle <a
10434 * href="#supplementary"> supplementary characters</a>. To support
10435 * all Unicode characters, including supplementary characters, use
10436 * the {@link #isDefined(int)} method.
10437 *
10438 * @param ch the character to be tested
10439 * @return {@code true} if the character has a defined meaning
10440 * in Unicode; {@code false} otherwise.
10441 * @see Character#isDigit(char)
10442 * @see Character#isLetter(char)
10443 * @see Character#isLetterOrDigit(char)
10444 * @see Character#isLowerCase(char)
10445 * @see Character#isTitleCase(char)
10446 * @see Character#isUpperCase(char)
10447 * @since 1.0.2
10448 */
10449 public static boolean isDefined(char ch) {
10450 return isDefined((int)ch);
10451 }
10452
10453 /**
10454 * Determines if a character (Unicode code point) is defined in Unicode.
10455 * <p>
10456 * A character is defined if at least one of the following is true:
10457 * <ul>
10458 * <li>It has an entry in the UnicodeData file.
10459 * <li>It has a value in a range defined by the UnicodeData file.
10460 * </ul>
10461 *
10462 * @param codePoint the character (Unicode code point) to be tested.
10463 * @return {@code true} if the character has a defined meaning
10464 * in Unicode; {@code false} otherwise.
10465 * @see Character#isDigit(int)
10466 * @see Character#isLetter(int)
10467 * @see Character#isLetterOrDigit(int)
10468 * @see Character#isLowerCase(int)
10469 * @see Character#isTitleCase(int)
10470 * @see Character#isUpperCase(int)
10471 * @since 1.5
10472 */
10473 public static boolean isDefined(int codePoint) {
10474 return getType(codePoint) != Character.UNASSIGNED;
10475 }
10476
10477 /**
10478 * Determines if the specified character is a letter.
10479 * <p>
10480 * A character is considered to be a letter if its general
10481 * category type, provided by {@code Character.getType(ch)},
10482 * is any of the following:
10483 * <ul>
10484 * <li> {@code UPPERCASE_LETTER}
10485 * <li> {@code LOWERCASE_LETTER}
10486 * <li> {@code TITLECASE_LETTER}
10487 * <li> {@code MODIFIER_LETTER}
10488 * <li> {@code OTHER_LETTER}
10489 * </ul>
10490 *
10491 * Not all letters have case. Many characters are
10492 * letters but are neither uppercase nor lowercase nor titlecase.
10493 *
10494 * <p><b>Note:</b> This method cannot handle <a
10495 * href="#supplementary"> supplementary characters</a>. To support
10496 * all Unicode characters, including supplementary characters, use
10497 * the {@link #isLetter(int)} method.
10498 *
10499 * @param ch the character to be tested.
10500 * @return {@code true} if the character is a letter;
10501 * {@code false} otherwise.
10502 * @see Character#isDigit(char)
10503 * @see Character#isJavaIdentifierStart(char)
10504 * @see Character#isJavaLetter(char)
10505 * @see Character#isJavaLetterOrDigit(char)
10506 * @see Character#isLetterOrDigit(char)
10507 * @see Character#isLowerCase(char)
10508 * @see Character#isTitleCase(char)
10509 * @see Character#isUnicodeIdentifierStart(char)
10510 * @see Character#isUpperCase(char)
10511 */
10512 public static boolean isLetter(char ch) {
10513 return isLetter((int)ch);
10514 }
10515
10516 /**
10517 * Determines if the specified character (Unicode code point) is a letter.
10518 * <p>
10519 * A character is considered to be a letter if its general
10520 * category type, provided by {@link Character#getType(int) getType(codePoint)},
10521 * is any of the following:
10522 * <ul>
10523 * <li> {@code UPPERCASE_LETTER}
10524 * <li> {@code LOWERCASE_LETTER}
10525 * <li> {@code TITLECASE_LETTER}
10526 * <li> {@code MODIFIER_LETTER}
10527 * <li> {@code OTHER_LETTER}
10528 * </ul>
10529 *
10530 * Not all letters have case. Many characters are
10531 * letters but are neither uppercase nor lowercase nor titlecase.
10532 *
10533 * @param codePoint the character (Unicode code point) to be tested.
10534 * @return {@code true} if the character is a letter;
10535 * {@code false} otherwise.
10536 * @see Character#isDigit(int)
10537 * @see Character#isJavaIdentifierStart(int)
10538 * @see Character#isLetterOrDigit(int)
10539 * @see Character#isLowerCase(int)
10540 * @see Character#isTitleCase(int)
10541 * @see Character#isUnicodeIdentifierStart(int)
10542 * @see Character#isUpperCase(int)
10543 * @since 1.5
10544 */
10545 public static boolean isLetter(int codePoint) {
10546 return ((((1 << Character.UPPERCASE_LETTER) |
10547 (1 << Character.LOWERCASE_LETTER) |
10548 (1 << Character.TITLECASE_LETTER) |
10549 (1 << Character.MODIFIER_LETTER) |
10550 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
10551 != 0;
10552 }
10553
10554 /**
10555 * Determines if the specified character is a letter or digit.
10556 * <p>
10557 * A character is considered to be a letter or digit if either
10558 * {@code Character.isLetter(char ch)} or
10559 * {@code Character.isDigit(char ch)} returns
10560 * {@code true} for the character.
10561 *
10562 * <p><b>Note:</b> This method cannot handle <a
10563 * href="#supplementary"> supplementary characters</a>. To support
10564 * all Unicode characters, including supplementary characters, use
10565 * the {@link #isLetterOrDigit(int)} method.
10566 *
10567 * @param ch the character to be tested.
10568 * @return {@code true} if the character is a letter or digit;
10569 * {@code false} otherwise.
10570 * @see Character#isDigit(char)
10571 * @see Character#isJavaIdentifierPart(char)
10572 * @see Character#isJavaLetter(char)
10573 * @see Character#isJavaLetterOrDigit(char)
10574 * @see Character#isLetter(char)
10575 * @see Character#isUnicodeIdentifierPart(char)
10576 * @since 1.0.2
10577 */
10578 public static boolean isLetterOrDigit(char ch) {
10579 return isLetterOrDigit((int)ch);
10580 }
10581
10582 /**
10583 * Determines if the specified character (Unicode code point) is a letter or digit.
10584 * <p>
10585 * A character is considered to be a letter or digit if either
10586 * {@link #isLetter(int) isLetter(codePoint)} or
10587 * {@link #isDigit(int) isDigit(codePoint)} returns
10588 * {@code true} for the character.
10589 *
10590 * @param codePoint the character (Unicode code point) to be tested.
10591 * @return {@code true} if the character is a letter or digit;
10592 * {@code false} otherwise.
10593 * @see Character#isDigit(int)
10594 * @see Character#isJavaIdentifierPart(int)
10595 * @see Character#isLetter(int)
10596 * @see Character#isUnicodeIdentifierPart(int)
10597 * @since 1.5
10598 */
10599 public static boolean isLetterOrDigit(int codePoint) {
10600 return ((((1 << Character.UPPERCASE_LETTER) |
10601 (1 << Character.LOWERCASE_LETTER) |
10602 (1 << Character.TITLECASE_LETTER) |
10603 (1 << Character.MODIFIER_LETTER) |
10604 (1 << Character.OTHER_LETTER) |
10605 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
10606 != 0;
10607 }
10608
10609 /**
10610 * Determines if the specified character is permissible as the first
10611 * character in a Java identifier.
10612 * <p>
10613 * A character may start a Java identifier if and only if
10614 * one of the following conditions is true:
10615 * <ul>
10616 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10617 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10618 * <li> {@code ch} is a currency symbol (such as {@code '$'})
10619 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10620 * </ul>
10621 *
10622 * @param ch the character to be tested.
10623 * @return {@code true} if the character may start a Java
10624 * identifier; {@code false} otherwise.
10625 * @see Character#isJavaLetterOrDigit(char)
10626 * @see Character#isJavaIdentifierStart(char)
10627 * @see Character#isJavaIdentifierPart(char)
10628 * @see Character#isLetter(char)
10629 * @see Character#isLetterOrDigit(char)
10630 * @see Character#isUnicodeIdentifierStart(char)
10631 * @since 1.0.2
10632 * @deprecated Replaced by isJavaIdentifierStart(char).
10633 */
10634 @Deprecated(since="1.1")
10635 public static boolean isJavaLetter(char ch) {
10636 return isJavaIdentifierStart(ch);
10637 }
10638
10639 /**
10640 * Determines if the specified character may be part of a Java
10641 * identifier as other than the first character.
10642 * <p>
10643 * A character may be part of a Java identifier if and only if one
10644 * of the following conditions is true:
10645 * <ul>
10646 * <li> it is a letter
10647 * <li> it is a currency symbol (such as {@code '$'})
10648 * <li> it is a connecting punctuation character (such as {@code '_'})
10649 * <li> it is a digit
10650 * <li> it is a numeric letter (such as a Roman numeral character)
10651 * <li> it is a combining mark
10652 * <li> it is a non-spacing mark
10653 * <li> {@code isIdentifierIgnorable} returns
10654 * {@code true} for the character.
10655 * </ul>
10656 *
10657 * @param ch the character to be tested.
10658 * @return {@code true} if the character may be part of a
10659 * Java identifier; {@code false} otherwise.
10660 * @see Character#isJavaLetter(char)
10661 * @see Character#isJavaIdentifierStart(char)
10662 * @see Character#isJavaIdentifierPart(char)
10663 * @see Character#isLetter(char)
10664 * @see Character#isLetterOrDigit(char)
10665 * @see Character#isUnicodeIdentifierPart(char)
10666 * @see Character#isIdentifierIgnorable(char)
10667 * @since 1.0.2
10668 * @deprecated Replaced by isJavaIdentifierPart(char).
10669 */
10670 @Deprecated(since="1.1")
10671 public static boolean isJavaLetterOrDigit(char ch) {
10672 return isJavaIdentifierPart(ch);
10673 }
10674
10675 /**
10676 * Determines if the specified character (Unicode code point) is alphabetic.
10677 * <p>
10678 * A character is considered to be alphabetic if its general category type,
10679 * provided by {@link Character#getType(int) getType(codePoint)}, is any of
10680 * the following:
10681 * <ul>
10682 * <li> {@code UPPERCASE_LETTER}
10683 * <li> {@code LOWERCASE_LETTER}
10684 * <li> {@code TITLECASE_LETTER}
10685 * <li> {@code MODIFIER_LETTER}
10686 * <li> {@code OTHER_LETTER}
10687 * <li> {@code LETTER_NUMBER}
10688 * </ul>
10689 * or it has contributory property Other_Alphabetic as defined by the
10690 * Unicode Standard.
10691 *
10692 * @param codePoint the character (Unicode code point) to be tested.
10693 * @return {@code true} if the character is a Unicode alphabet
10694 * character, {@code false} otherwise.
10695 * @since 1.7
10696 */
10697 public static boolean isAlphabetic(int codePoint) {
10698 return (((((1 << Character.UPPERCASE_LETTER) |
10699 (1 << Character.LOWERCASE_LETTER) |
10700 (1 << Character.TITLECASE_LETTER) |
10701 (1 << Character.MODIFIER_LETTER) |
10702 (1 << Character.OTHER_LETTER) |
10703 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
10704 CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
10705 }
10706
10707 /**
10708 * Determines if the specified character (Unicode code point) is a CJKV
10709 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
10710 * the Unicode Standard.
10711 *
10712 * @param codePoint the character (Unicode code point) to be tested.
10713 * @return {@code true} if the character is a Unicode ideograph
10714 * character, {@code false} otherwise.
10715 * @since 1.7
10716 */
10717 public static boolean isIdeographic(int codePoint) {
10718 return CharacterData.of(codePoint).isIdeographic(codePoint);
10719 }
10720
10721 /**
10722 * Determines if the specified character is
10723 * permissible as the first character in a Java identifier.
10724 * <p>
10725 * A character may start a Java identifier if and only if
10726 * one of the following conditions is true:
10727 * <ul>
10728 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10729 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10730 * <li> {@code ch} is a currency symbol (such as {@code '$'})
10731 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10732 * </ul>
10733 *
10734 * <p><b>Note:</b> This method cannot handle <a
10735 * href="#supplementary"> supplementary characters</a>. To support
10736 * all Unicode characters, including supplementary characters, use
10737 * the {@link #isJavaIdentifierStart(int)} method.
10738 *
10739 * @param ch the character to be tested.
10740 * @return {@code true} if the character may start a Java identifier;
10741 * {@code false} otherwise.
10742 * @see Character#isJavaIdentifierPart(char)
10743 * @see Character#isLetter(char)
10744 * @see Character#isUnicodeIdentifierStart(char)
10745 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10746 * @since 1.1
10747 */
10748 @SuppressWarnings("doclint:reference") // cross-module links
10749 public static boolean isJavaIdentifierStart(char ch) {
10750 return isJavaIdentifierStart((int)ch);
10751 }
10752
10753 /**
10754 * Determines if the character (Unicode code point) is
10755 * permissible as the first character in a Java identifier.
10756 * <p>
10757 * A character may start a Java identifier if and only if
10758 * one of the following conditions is true:
10759 * <ul>
10760 * <li> {@link #isLetter(int) isLetter(codePoint)}
10761 * returns {@code true}
10762 * <li> {@link #getType(int) getType(codePoint)}
10763 * returns {@code LETTER_NUMBER}
10764 * <li> the referenced character is a currency symbol (such as {@code '$'})
10765 * <li> the referenced character is a connecting punctuation character
10766 * (such as {@code '_'}).
10767 * </ul>
10768 *
10769 * @param codePoint the character (Unicode code point) to be tested.
10770 * @return {@code true} if the character may start a Java identifier;
10771 * {@code false} otherwise.
10772 * @see Character#isJavaIdentifierPart(int)
10773 * @see Character#isLetter(int)
10774 * @see Character#isUnicodeIdentifierStart(int)
10775 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10776 * @since 1.5
10777 */
10778 @SuppressWarnings("doclint:reference") // cross-module links
10779 public static boolean isJavaIdentifierStart(int codePoint) {
10780 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
10781 }
10782
10783 /**
10784 * Determines if the specified character may be part of a Java
10785 * identifier as other than the first character.
10786 * <p>
10787 * A character may be part of a Java identifier if any of the following
10788 * conditions are true:
10789 * <ul>
10790 * <li> it is a letter
10791 * <li> it is a currency symbol (such as {@code '$'})
10792 * <li> it is a connecting punctuation character (such as {@code '_'})
10793 * <li> it is a digit
10794 * <li> it is a numeric letter (such as a Roman numeral character)
10795 * <li> it is a combining mark
10796 * <li> it is a non-spacing mark
10797 * <li> {@code isIdentifierIgnorable} returns
10798 * {@code true} for the character
10799 * </ul>
10800 *
10801 * <p><b>Note:</b> This method cannot handle <a
10802 * href="#supplementary"> supplementary characters</a>. To support
10803 * all Unicode characters, including supplementary characters, use
10804 * the {@link #isJavaIdentifierPart(int)} method.
10805 *
10806 * @param ch the character to be tested.
10807 * @return {@code true} if the character may be part of a
10808 * Java identifier; {@code false} otherwise.
10809 * @see Character#isIdentifierIgnorable(char)
10810 * @see Character#isJavaIdentifierStart(char)
10811 * @see Character#isLetterOrDigit(char)
10812 * @see Character#isUnicodeIdentifierPart(char)
10813 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10814 * @since 1.1
10815 */
10816 @SuppressWarnings("doclint:reference") // cross-module links
10817 public static boolean isJavaIdentifierPart(char ch) {
10818 return isJavaIdentifierPart((int)ch);
10819 }
10820
10821 /**
10822 * Determines if the character (Unicode code point) may be part of a Java
10823 * identifier as other than the first character.
10824 * <p>
10825 * A character may be part of a Java identifier if any of the following
10826 * conditions are true:
10827 * <ul>
10828 * <li> it is a letter
10829 * <li> it is a currency symbol (such as {@code '$'})
10830 * <li> it is a connecting punctuation character (such as {@code '_'})
10831 * <li> it is a digit
10832 * <li> it is a numeric letter (such as a Roman numeral character)
10833 * <li> it is a combining mark
10834 * <li> it is a non-spacing mark
10835 * <li> {@link #isIdentifierIgnorable(int)
10836 * isIdentifierIgnorable(codePoint)} returns {@code true} for
10837 * the code point
10838 * </ul>
10839 *
10840 * @param codePoint the character (Unicode code point) to be tested.
10841 * @return {@code true} if the character may be part of a
10842 * Java identifier; {@code false} otherwise.
10843 * @see Character#isIdentifierIgnorable(int)
10844 * @see Character#isJavaIdentifierStart(int)
10845 * @see Character#isLetterOrDigit(int)
10846 * @see Character#isUnicodeIdentifierPart(int)
10847 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10848 * @since 1.5
10849 */
10850 @SuppressWarnings("doclint:reference") // cross-module links
10851 public static boolean isJavaIdentifierPart(int codePoint) {
10852 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
10853 }
10854
10855 /**
10856 * Determines if the specified character is permissible as the
10857 * first character in a Unicode identifier.
10858 * <p>
10859 * A character may start a Unicode identifier if and only if
10860 * one of the following conditions is true:
10861 * <ul>
10862 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10863 * <li> {@link #getType(char) getType(ch)} returns
10864 * {@code LETTER_NUMBER}.
10865 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10866 * {@code Other_ID_Start}</a> character.
10867 * </ul>
10868 * <p>
10869 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10870 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10871 * with the following profile of UAX31:
10872 * <pre>
10873 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10874 * </pre>
10875 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10876 * compatibility.
10877 *
10878 * <p><b>Note:</b> This method cannot handle <a
10879 * href="#supplementary"> supplementary characters</a>. To support
10880 * all Unicode characters, including supplementary characters, use
10881 * the {@link #isUnicodeIdentifierStart(int)} method.
10882 *
10883 * @param ch the character to be tested.
10884 * @return {@code true} if the character may start a Unicode
10885 * identifier; {@code false} otherwise.
10886 *
10887 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
10888 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
10889 * @see Character#isJavaIdentifierStart(char)
10890 * @see Character#isLetter(char)
10891 * @see Character#isUnicodeIdentifierPart(char)
10892 * @since 1.1
10893 */
10894 public static boolean isUnicodeIdentifierStart(char ch) {
10895 return isUnicodeIdentifierStart((int)ch);
10896 }
10897
10898 /**
10899 * Determines if the specified character (Unicode code point) is permissible as the
10900 * first character in a Unicode identifier.
10901 * <p>
10902 * A character may start a Unicode identifier if and only if
10903 * one of the following conditions is true:
10904 * <ul>
10905 * <li> {@link #isLetter(int) isLetter(codePoint)}
10906 * returns {@code true}
10907 * <li> {@link #getType(int) getType(codePoint)}
10908 * returns {@code LETTER_NUMBER}.
10909 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10910 * {@code Other_ID_Start}</a> character.
10911 * </ul>
10912 * <p>
10913 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10914 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10915 * with the following profile of UAX31:
10916 * <pre>
10917 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10918 * </pre>
10919 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10920 * compatibility.
10921 *
10922 * @param codePoint the character (Unicode code point) to be tested.
10923 * @return {@code true} if the character may start a Unicode
10924 * identifier; {@code false} otherwise.
10925 *
10926 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
10927 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
10928 * @see Character#isJavaIdentifierStart(int)
10929 * @see Character#isLetter(int)
10930 * @see Character#isUnicodeIdentifierPart(int)
10931 * @since 1.5
10932 */
10933 public static boolean isUnicodeIdentifierStart(int codePoint) {
10934 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
10935 }
10936
10937 /**
10938 * Determines if the specified character may be part of a Unicode
10939 * identifier as other than the first character.
10940 * <p>
10941 * A character may be part of a Unicode identifier if and only if
10942 * one of the following statements is true:
10943 * <ul>
10944 * <li> it is a letter
10945 * <li> it is a connecting punctuation character (such as {@code '_'})
10946 * <li> it is a digit
10947 * <li> it is a numeric letter (such as a Roman numeral character)
10948 * <li> it is a combining mark
10949 * <li> it is a non-spacing mark
10950 * <li> {@code isIdentifierIgnorable} returns
10951 * {@code true} for this character.
10952 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10953 * {@code Other_ID_Start}</a> character.
10954 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10955 * {@code Other_ID_Continue}</a> character.
10956 * </ul>
10957 * <p>
10958 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10959 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10960 * with the following profile of UAX31:
10961 * <pre>
10962 * Continue := Start + ID_Continue + ignorable
10963 * Medial := empty
10964 * ignorable := isIdentifierIgnorable(char) returns true for the character
10965 * </pre>
10966 * {@code ignorable} is added to {@code Continue} for backward
10967 * compatibility.
10968 *
10969 * <p><b>Note:</b> This method cannot handle <a
10970 * href="#supplementary"> supplementary characters</a>. To support
10971 * all Unicode characters, including supplementary characters, use
10972 * the {@link #isUnicodeIdentifierPart(int)} method.
10973 *
10974 * @param ch the character to be tested.
10975 * @return {@code true} if the character may be part of a
10976 * Unicode identifier; {@code false} otherwise.
10977 *
10978 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
10979 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
10980 * @see Character#isIdentifierIgnorable(char)
10981 * @see Character#isJavaIdentifierPart(char)
10982 * @see Character#isLetterOrDigit(char)
10983 * @see Character#isUnicodeIdentifierStart(char)
10984 * @since 1.1
10985 */
10986 public static boolean isUnicodeIdentifierPart(char ch) {
10987 return isUnicodeIdentifierPart((int)ch);
10988 }
10989
10990 /**
10991 * Determines if the specified character (Unicode code point) may be part of a Unicode
10992 * identifier as other than the first character.
10993 * <p>
10994 * A character may be part of a Unicode identifier if and only if
10995 * one of the following statements is true:
10996 * <ul>
10997 * <li> it is a letter
10998 * <li> it is a connecting punctuation character (such as {@code '_'})
10999 * <li> it is a digit
11000 * <li> it is a numeric letter (such as a Roman numeral character)
11001 * <li> it is a combining mark
11002 * <li> it is a non-spacing mark
11003 * <li> {@code isIdentifierIgnorable} returns
11004 * {@code true} for this character.
11005 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
11006 * {@code Other_ID_Start}</a> character.
11007 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
11008 * {@code Other_ID_Continue}</a> character.
11009 * </ul>
11010 * <p>
11011 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
11012 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
11013 * with the following profile of UAX31:
11014 * <pre>
11015 * Continue := Start + ID_Continue + ignorable
11016 * Medial := empty
11017 * ignorable := isIdentifierIgnorable(int) returns true for the character
11018 * </pre>
11019 * {@code ignorable} is added to {@code Continue} for backward
11020 * compatibility.
11021 *
11022 * @param codePoint the character (Unicode code point) to be tested.
11023 * @return {@code true} if the character may be part of a
11024 * Unicode identifier; {@code false} otherwise.
11025 *
11026 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
11027 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
11028 * @see Character#isIdentifierIgnorable(int)
11029 * @see Character#isJavaIdentifierPart(int)
11030 * @see Character#isLetterOrDigit(int)
11031 * @see Character#isUnicodeIdentifierStart(int)
11032 * @since 1.5
11033 */
11034 public static boolean isUnicodeIdentifierPart(int codePoint) {
11035 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
11036 }
11037
11038 /**
11039 * Determines if the specified character should be regarded as
11040 * an ignorable character in a Java identifier or a Unicode identifier.
11041 * <p>
11042 * The following Unicode characters are ignorable in a Java identifier
11043 * or a Unicode identifier:
11044 * <ul>
11045 * <li>ISO control characters that are not whitespace
11046 * <ul>
11047 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
11048 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
11049 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
11050 * </ul>
11051 *
11052 * <li>all characters that have the {@code FORMAT} general
11053 * category value
11054 * </ul>
11055 *
11056 * <p><b>Note:</b> This method cannot handle <a
11057 * href="#supplementary"> supplementary characters</a>. To support
11058 * all Unicode characters, including supplementary characters, use
11059 * the {@link #isIdentifierIgnorable(int)} method.
11060 *
11061 * @param ch the character to be tested.
11062 * @return {@code true} if the character is an ignorable control
11063 * character that may be part of a Java or Unicode identifier;
11064 * {@code false} otherwise.
11065 * @see Character#isJavaIdentifierPart(char)
11066 * @see Character#isUnicodeIdentifierPart(char)
11067 * @since 1.1
11068 */
11069 public static boolean isIdentifierIgnorable(char ch) {
11070 return isIdentifierIgnorable((int)ch);
11071 }
11072
11073 /**
11074 * Determines if the specified character (Unicode code point) should be regarded as
11075 * an ignorable character in a Java identifier or a Unicode identifier.
11076 * <p>
11077 * The following Unicode characters are ignorable in a Java identifier
11078 * or a Unicode identifier:
11079 * <ul>
11080 * <li>ISO control characters that are not whitespace
11081 * <ul>
11082 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
11083 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
11084 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
11085 * </ul>
11086 *
11087 * <li>all characters that have the {@code FORMAT} general
11088 * category value
11089 * </ul>
11090 *
11091 * @param codePoint the character (Unicode code point) to be tested.
11092 * @return {@code true} if the character is an ignorable control
11093 * character that may be part of a Java or Unicode identifier;
11094 * {@code false} otherwise.
11095 * @see Character#isJavaIdentifierPart(int)
11096 * @see Character#isUnicodeIdentifierPart(int)
11097 * @since 1.5
11098 */
11099 public static boolean isIdentifierIgnorable(int codePoint) {
11100 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
11101 }
11102
11103 /**
11104 * Determines if the specified character (Unicode code point) is an Emoji.
11105 * <p>
11106 * A character is considered to be an Emoji if and only if it has the {@code Emoji}
11107 * property, defined in
11108 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11109 * Unicode Emoji (Technical Standard #51)</a>.
11110 *
11111 * @param codePoint the character (Unicode code point) to be tested.
11112 * @return {@code true} if the character is an Emoji;
11113 * {@code false} otherwise.
11114 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11115 * @since 21
11116 */
11117 public static boolean isEmoji(int codePoint) {
11118 return CharacterData.of(codePoint).isEmoji(codePoint);
11119 }
11120
11121 /**
11122 * Determines if the specified character (Unicode code point) has the
11123 * Emoji Presentation property by default.
11124 * <p>
11125 * A character is considered to have the Emoji Presentation property if and
11126 * only if it has the {@code Emoji_Presentation} property, defined in
11127 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11128 * Unicode Emoji (Technical Standard #51)</a>.
11129 *
11130 * @param codePoint the character (Unicode code point) to be tested.
11131 * @return {@code true} if the character has the Emoji Presentation
11132 * property; {@code false} otherwise.
11133 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11134 * @since 21
11135 */
11136 public static boolean isEmojiPresentation(int codePoint) {
11137 return CharacterData.of(codePoint).isEmojiPresentation(codePoint);
11138 }
11139
11140 /**
11141 * Determines if the specified character (Unicode code point) is an
11142 * Emoji Modifier.
11143 * <p>
11144 * A character is considered to be an Emoji Modifier if and only if it has
11145 * the {@code Emoji_Modifier} property, defined in
11146 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11147 * Unicode Emoji (Technical Standard #51)</a>.
11148 *
11149 * @param codePoint the character (Unicode code point) to be tested.
11150 * @return {@code true} if the character is an Emoji Modifier;
11151 * {@code false} otherwise.
11152 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11153 * @since 21
11154 */
11155 public static boolean isEmojiModifier(int codePoint) {
11156 return CharacterData.of(codePoint).isEmojiModifier(codePoint);
11157 }
11158
11159 /**
11160 * Determines if the specified character (Unicode code point) is an
11161 * Emoji Modifier Base.
11162 * <p>
11163 * A character is considered to be an Emoji Modifier Base if and only if it has
11164 * the {@code Emoji_Modifier_Base} property, defined in
11165 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11166 * Unicode Emoji (Technical Standard #51)</a>.
11167 *
11168 * @param codePoint the character (Unicode code point) to be tested.
11169 * @return {@code true} if the character is an Emoji Modifier Base;
11170 * {@code false} otherwise.
11171 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11172 * @since 21
11173 */
11174 public static boolean isEmojiModifierBase(int codePoint) {
11175 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint);
11176 }
11177
11178 /**
11179 * Determines if the specified character (Unicode code point) is an
11180 * Emoji Component.
11181 * <p>
11182 * A character is considered to be an Emoji Component if and only if it has
11183 * the {@code Emoji_Component} property, defined in
11184 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11185 * Unicode Emoji (Technical Standard #51)</a>.
11186 *
11187 * @param codePoint the character (Unicode code point) to be tested.
11188 * @return {@code true} if the character is an Emoji Component;
11189 * {@code false} otherwise.
11190 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11191 * @since 21
11192 */
11193 public static boolean isEmojiComponent(int codePoint) {
11194 return CharacterData.of(codePoint).isEmojiComponent(codePoint);
11195 }
11196
11197 /**
11198 * Determines if the specified character (Unicode code point) is
11199 * an Extended Pictographic.
11200 * <p>
11201 * A character is considered to be an Extended Pictographic if and only if it has
11202 * the {@code Extended_Pictographic} property, defined in
11203 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11204 * Unicode Emoji (Technical Standard #51)</a>.
11205 *
11206 * @param codePoint the character (Unicode code point) to be tested.
11207 * @return {@code true} if the character is an Extended Pictographic;
11208 * {@code false} otherwise.
11209 * @spec https://unicode.org/reports/tr51/ Unicode Emoji
11210 * @since 21
11211 */
11212 public static boolean isExtendedPictographic(int codePoint) {
11213 return CharacterData.of(codePoint).isExtendedPictographic(codePoint);
11214 }
11215
11216 /**
11217 * Converts the character argument to lowercase using case
11218 * mapping information from the UnicodeData file.
11219 * <p>
11220 * Note that
11221 * {@code Character.isLowerCase(Character.toLowerCase(ch))}
11222 * does not always return {@code true} for some ranges of
11223 * characters, particularly those that are symbols or ideographs.
11224 *
11225 * <p>In general, {@link String#toLowerCase()} should be used to map
11226 * characters to lowercase. {@code String} case mapping methods
11227 * have several benefits over {@code Character} case mapping methods.
11228 * {@code String} case mapping methods can perform locale-sensitive
11229 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11230 * the {@code Character} case mapping methods cannot.
11231 *
11232 * <p><b>Note:</b> This method cannot handle <a
11233 * href="#supplementary"> supplementary characters</a>. To support
11234 * all Unicode characters, including supplementary characters, use
11235 * the {@link #toLowerCase(int)} method.
11236 *
11237 * @param ch the character to be converted.
11238 * @return the lowercase equivalent of the character, if any;
11239 * otherwise, the character itself.
11240 * @see Character#isLowerCase(char)
11241 * @see String#toLowerCase()
11242 */
11243 public static char toLowerCase(char ch) {
11244 return (char)toLowerCase((int)ch);
11245 }
11246
11247 /**
11248 * Converts the character (Unicode code point) argument to
11249 * lowercase using case mapping information from the UnicodeData
11250 * file.
11251 *
11252 * <p> Note that
11253 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
11254 * does not always return {@code true} for some ranges of
11255 * characters, particularly those that are symbols or ideographs.
11256 *
11257 * <p>In general, {@link String#toLowerCase()} should be used to map
11258 * characters to lowercase. {@code String} case mapping methods
11259 * have several benefits over {@code Character} case mapping methods.
11260 * {@code String} case mapping methods can perform locale-sensitive
11261 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11262 * the {@code Character} case mapping methods cannot.
11263 *
11264 * @param codePoint the character (Unicode code point) to be converted.
11265 * @return the lowercase equivalent of the character (Unicode code
11266 * point), if any; otherwise, the character itself.
11267 * @see Character#isLowerCase(int)
11268 * @see String#toLowerCase()
11269 *
11270 * @since 1.5
11271 */
11272 public static int toLowerCase(int codePoint) {
11273 return CharacterData.of(codePoint).toLowerCase(codePoint);
11274 }
11275
11276 /**
11277 * Converts the character argument to uppercase using case mapping
11278 * information from the UnicodeData file.
11279 * <p>
11280 * Note that
11281 * {@code Character.isUpperCase(Character.toUpperCase(ch))}
11282 * does not always return {@code true} for some ranges of
11283 * characters, particularly those that are symbols or ideographs.
11284 *
11285 * <p>In general, {@link String#toUpperCase()} should be used to map
11286 * characters to uppercase. {@code String} case mapping methods
11287 * have several benefits over {@code Character} case mapping methods.
11288 * {@code String} case mapping methods can perform locale-sensitive
11289 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11290 * the {@code Character} case mapping methods cannot.
11291 *
11292 * <p><b>Note:</b> This method cannot handle <a
11293 * href="#supplementary"> supplementary characters</a>. To support
11294 * all Unicode characters, including supplementary characters, use
11295 * the {@link #toUpperCase(int)} method.
11296 *
11297 * @param ch the character to be converted.
11298 * @return the uppercase equivalent of the character, if any;
11299 * otherwise, the character itself.
11300 * @see Character#isUpperCase(char)
11301 * @see String#toUpperCase()
11302 */
11303 public static char toUpperCase(char ch) {
11304 return (char)toUpperCase((int)ch);
11305 }
11306
11307 /**
11308 * Converts the character (Unicode code point) argument to
11309 * uppercase using case mapping information from the UnicodeData
11310 * file.
11311 *
11312 * <p>Note that
11313 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
11314 * does not always return {@code true} for some ranges of
11315 * characters, particularly those that are symbols or ideographs.
11316 *
11317 * <p>In general, {@link String#toUpperCase()} should be used to map
11318 * characters to uppercase. {@code String} case mapping methods
11319 * have several benefits over {@code Character} case mapping methods.
11320 * {@code String} case mapping methods can perform locale-sensitive
11321 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11322 * the {@code Character} case mapping methods cannot.
11323 *
11324 * @param codePoint the character (Unicode code point) to be converted.
11325 * @return the uppercase equivalent of the character, if any;
11326 * otherwise, the character itself.
11327 * @see Character#isUpperCase(int)
11328 * @see String#toUpperCase()
11329 *
11330 * @since 1.5
11331 */
11332 public static int toUpperCase(int codePoint) {
11333 return CharacterData.of(codePoint).toUpperCase(codePoint);
11334 }
11335
11336 /**
11337 * Converts the character argument to titlecase using case mapping
11338 * information from the UnicodeData file. If a character has no
11339 * explicit titlecase mapping and is not itself a titlecase char
11340 * according to UnicodeData, then the uppercase mapping is
11341 * returned as an equivalent titlecase mapping. If the
11342 * {@code char} argument is already a titlecase
11343 * {@code char}, the same {@code char} value will be
11344 * returned.
11345 * <p>
11346 * Note that
11347 * {@code Character.isTitleCase(Character.toTitleCase(ch))}
11348 * does not always return {@code true} for some ranges of
11349 * characters.
11350 *
11351 * <p><b>Note:</b> This method cannot handle <a
11352 * href="#supplementary"> supplementary characters</a>. To support
11353 * all Unicode characters, including supplementary characters, use
11354 * the {@link #toTitleCase(int)} method.
11355 *
11356 * @param ch the character to be converted.
11357 * @return the titlecase equivalent of the character, if any;
11358 * otherwise, the character itself.
11359 * @see Character#isTitleCase(char)
11360 * @see Character#toLowerCase(char)
11361 * @see Character#toUpperCase(char)
11362 * @since 1.0.2
11363 */
11364 public static char toTitleCase(char ch) {
11365 return (char)toTitleCase((int)ch);
11366 }
11367
11368 /**
11369 * Converts the character (Unicode code point) argument to titlecase using case mapping
11370 * information from the UnicodeData file. If a character has no
11371 * explicit titlecase mapping and is not itself a titlecase char
11372 * according to UnicodeData, then the uppercase mapping is
11373 * returned as an equivalent titlecase mapping. If the
11374 * character argument is already a titlecase
11375 * character, the same character value will be
11376 * returned.
11377 *
11378 * <p>Note that
11379 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
11380 * does not always return {@code true} for some ranges of
11381 * characters.
11382 *
11383 * @param codePoint the character (Unicode code point) to be converted.
11384 * @return the titlecase equivalent of the character, if any;
11385 * otherwise, the character itself.
11386 * @see Character#isTitleCase(int)
11387 * @see Character#toLowerCase(int)
11388 * @see Character#toUpperCase(int)
11389 * @since 1.5
11390 */
11391 public static int toTitleCase(int codePoint) {
11392 return CharacterData.of(codePoint).toTitleCase(codePoint);
11393 }
11394
11395 /**
11396 * Returns the numeric value of the character {@code ch} in the
11397 * specified radix.
11398 * <p>
11399 * If the radix is not in the range {@code MIN_RADIX} ≤
11400 * {@code radix} ≤ {@code MAX_RADIX} or if the
11401 * value of {@code ch} is not a valid digit in the specified
11402 * radix, {@code -1} is returned. A character is a valid digit
11403 * if at least one of the following is true:
11404 * <ul>
11405 * <li>The method {@code isDigit} is {@code true} of the character
11406 * and the Unicode decimal digit value of the character (or its
11407 * single-character decomposition) is less than the specified radix.
11408 * In this case the decimal digit value is returned.
11409 * <li>The character is one of the uppercase Latin letters
11410 * {@code 'A'} through {@code 'Z'} and its code is less than
11411 * {@code radix + 'A' - 10}.
11412 * In this case, {@code ch - 'A' + 10}
11413 * is returned.
11414 * <li>The character is one of the lowercase Latin letters
11415 * {@code 'a'} through {@code 'z'} and its code is less than
11416 * {@code radix + 'a' - 10}.
11417 * In this case, {@code ch - 'a' + 10}
11418 * is returned.
11419 * <li>The character is one of the fullwidth uppercase Latin letters A
11420 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11421 * and its code is less than
11422 * {@code radix + '\u005CuFF21' - 10}.
11423 * In this case, {@code ch - '\u005CuFF21' + 10}
11424 * is returned.
11425 * <li>The character is one of the fullwidth lowercase Latin letters a
11426 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11427 * and its code is less than
11428 * {@code radix + '\u005CuFF41' - 10}.
11429 * In this case, {@code ch - '\u005CuFF41' + 10}
11430 * is returned.
11431 * </ul>
11432 *
11433 * <p><b>Note:</b> This method cannot handle <a
11434 * href="#supplementary"> supplementary characters</a>. To support
11435 * all Unicode characters, including supplementary characters, use
11436 * the {@link #digit(int, int)} method.
11437 *
11438 * @param ch the character to be converted.
11439 * @param radix the radix.
11440 * @return the numeric value represented by the character in the
11441 * specified radix.
11442 * @see Character#forDigit(int, int)
11443 * @see Character#isDigit(char)
11444 */
11445 public static int digit(char ch, int radix) {
11446 return digit((int)ch, radix);
11447 }
11448
11449 /**
11450 * Returns the numeric value of the specified character (Unicode
11451 * code point) in the specified radix.
11452 *
11453 * <p>If the radix is not in the range {@code MIN_RADIX} ≤
11454 * {@code radix} ≤ {@code MAX_RADIX} or if the
11455 * character is not a valid digit in the specified
11456 * radix, {@code -1} is returned. A character is a valid digit
11457 * if at least one of the following is true:
11458 * <ul>
11459 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
11460 * and the Unicode decimal digit value of the character (or its
11461 * single-character decomposition) is less than the specified radix.
11462 * In this case the decimal digit value is returned.
11463 * <li>The character is one of the uppercase Latin letters
11464 * {@code 'A'} through {@code 'Z'} and its code is less than
11465 * {@code radix + 'A' - 10}.
11466 * In this case, {@code codePoint - 'A' + 10}
11467 * is returned.
11468 * <li>The character is one of the lowercase Latin letters
11469 * {@code 'a'} through {@code 'z'} and its code is less than
11470 * {@code radix + 'a' - 10}.
11471 * In this case, {@code codePoint - 'a' + 10}
11472 * is returned.
11473 * <li>The character is one of the fullwidth uppercase Latin letters A
11474 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11475 * and its code is less than
11476 * {@code radix + '\u005CuFF21' - 10}.
11477 * In this case,
11478 * {@code codePoint - '\u005CuFF21' + 10}
11479 * is returned.
11480 * <li>The character is one of the fullwidth lowercase Latin letters a
11481 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11482 * and its code is less than
11483 * {@code radix + '\u005CuFF41'- 10}.
11484 * In this case,
11485 * {@code codePoint - '\u005CuFF41' + 10}
11486 * is returned.
11487 * </ul>
11488 *
11489 * @param codePoint the character (Unicode code point) to be converted.
11490 * @param radix the radix.
11491 * @return the numeric value represented by the character in the
11492 * specified radix.
11493 * @see Character#forDigit(int, int)
11494 * @see Character#isDigit(int)
11495 * @since 1.5
11496 */
11497 public static int digit(int codePoint, int radix) {
11498 return CharacterData.of(codePoint).digit(codePoint, radix);
11499 }
11500
11501 /**
11502 * Returns the {@code int} value that the specified Unicode
11503 * character represents. For example, the character
11504 * {@code '\u005Cu216C'} (the roman numeral fifty) will return
11505 * an int with a value of 50.
11506 * <p>
11507 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11508 * {@code '\u005Cu005A'}), lowercase
11509 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11510 * full width variant ({@code '\u005CuFF21'} through
11511 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11512 * {@code '\u005CuFF5A'}) forms have numeric values from 10
11513 * through 35. This is independent of the Unicode specification,
11514 * which does not assign numeric values to these {@code char}
11515 * values.
11516 * <p>
11517 * If the character does not have a numeric value, then -1 is returned.
11518 * If the character has a numeric value that cannot be represented as a
11519 * nonnegative integer (for example, a fractional value), then -2
11520 * is returned.
11521 *
11522 * <p><b>Note:</b> This method cannot handle <a
11523 * href="#supplementary"> supplementary characters</a>. To support
11524 * all Unicode characters, including supplementary characters, use
11525 * the {@link #getNumericValue(int)} method.
11526 *
11527 * @param ch the character to be converted.
11528 * @return the numeric value of the character, as a nonnegative {@code int}
11529 * value; -2 if the character has a numeric value but the value
11530 * can not be represented as a nonnegative {@code int} value;
11531 * -1 if the character has no numeric value.
11532 * @see Character#forDigit(int, int)
11533 * @see Character#isDigit(char)
11534 * @since 1.1
11535 */
11536 public static int getNumericValue(char ch) {
11537 return getNumericValue((int)ch);
11538 }
11539
11540 /**
11541 * Returns the {@code int} value that the specified
11542 * character (Unicode code point) represents. For example, the character
11543 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
11544 * an {@code int} with a value of 50.
11545 * <p>
11546 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11547 * {@code '\u005Cu005A'}), lowercase
11548 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11549 * full width variant ({@code '\u005CuFF21'} through
11550 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11551 * {@code '\u005CuFF5A'}) forms have numeric values from 10
11552 * through 35. This is independent of the Unicode specification,
11553 * which does not assign numeric values to these {@code char}
11554 * values.
11555 * <p>
11556 * If the character does not have a numeric value, then -1 is returned.
11557 * If the character has a numeric value that cannot be represented as a
11558 * nonnegative integer (for example, a fractional value), then -2
11559 * is returned.
11560 *
11561 * @param codePoint the character (Unicode code point) to be converted.
11562 * @return the numeric value of the character, as a nonnegative {@code int}
11563 * value; -2 if the character has a numeric value but the value
11564 * can not be represented as a nonnegative {@code int} value;
11565 * -1 if the character has no numeric value.
11566 * @see Character#forDigit(int, int)
11567 * @see Character#isDigit(int)
11568 * @since 1.5
11569 */
11570 public static int getNumericValue(int codePoint) {
11571 return CharacterData.of(codePoint).getNumericValue(codePoint);
11572 }
11573
11574 /**
11575 * Determines if the specified character is ISO-LATIN-1 white space.
11576 * This method returns {@code true} for the following five
11577 * characters only:
11578 * <table class="striped">
11579 * <caption style="display:none">truechars</caption>
11580 * <thead>
11581 * <tr><th scope="col">Character
11582 * <th scope="col">Code
11583 * <th scope="col">Name
11584 * </thead>
11585 * <tbody>
11586 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td>
11587 * <td>{@code HORIZONTAL TABULATION}</td></tr>
11588 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td>
11589 * <td>{@code NEW LINE}</td></tr>
11590 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td>
11591 * <td>{@code FORM FEED}</td></tr>
11592 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td>
11593 * <td>{@code CARRIAGE RETURN}</td></tr>
11594 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td>
11595 * <td>{@code SPACE}</td></tr>
11596 * </tbody>
11597 * </table>
11598 *
11599 * @param ch the character to be tested.
11600 * @return {@code true} if the character is ISO-LATIN-1 white
11601 * space; {@code false} otherwise.
11602 * @see Character#isSpaceChar(char)
11603 * @see Character#isWhitespace(char)
11604 * @deprecated Replaced by isWhitespace(char).
11605 */
11606 @Deprecated(since="1.1")
11607 public static boolean isSpace(char ch) {
11608 return (ch <= 0x0020) &&
11609 (((((1L << 0x0009) |
11610 (1L << 0x000A) |
11611 (1L << 0x000C) |
11612 (1L << 0x000D) |
11613 (1L << 0x0020)) >> ch) & 1L) != 0);
11614 }
11615
11616
11617 /**
11618 * Determines if the specified character is a Unicode space character.
11619 * A character is considered to be a space character if and only if
11620 * it is specified to be a space character by the Unicode Standard. This
11621 * method returns true if the character's general category type is any of
11622 * the following:
11623 * <ul>
11624 * <li> {@code SPACE_SEPARATOR}
11625 * <li> {@code LINE_SEPARATOR}
11626 * <li> {@code PARAGRAPH_SEPARATOR}
11627 * </ul>
11628 *
11629 * <p><b>Note:</b> This method cannot handle <a
11630 * href="#supplementary"> supplementary characters</a>. To support
11631 * all Unicode characters, including supplementary characters, use
11632 * the {@link #isSpaceChar(int)} method.
11633 *
11634 * @param ch the character to be tested.
11635 * @return {@code true} if the character is a space character;
11636 * {@code false} otherwise.
11637 * @see Character#isWhitespace(char)
11638 * @since 1.1
11639 */
11640 public static boolean isSpaceChar(char ch) {
11641 return isSpaceChar((int)ch);
11642 }
11643
11644 /**
11645 * Determines if the specified character (Unicode code point) is a
11646 * Unicode space character. A character is considered to be a
11647 * space character if and only if it is specified to be a space
11648 * character by the Unicode Standard. This method returns true if
11649 * the character's general category type is any of the following:
11650 *
11651 * <ul>
11652 * <li> {@link #SPACE_SEPARATOR}
11653 * <li> {@link #LINE_SEPARATOR}
11654 * <li> {@link #PARAGRAPH_SEPARATOR}
11655 * </ul>
11656 *
11657 * @param codePoint the character (Unicode code point) to be tested.
11658 * @return {@code true} if the character is a space character;
11659 * {@code false} otherwise.
11660 * @see Character#isWhitespace(int)
11661 * @since 1.5
11662 */
11663 public static boolean isSpaceChar(int codePoint) {
11664 return ((((1 << Character.SPACE_SEPARATOR) |
11665 (1 << Character.LINE_SEPARATOR) |
11666 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
11667 != 0;
11668 }
11669
11670 /**
11671 * Determines if the specified character is white space according to Java.
11672 * A character is a Java whitespace character if and only if it satisfies
11673 * one of the following criteria:
11674 * <ul>
11675 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
11676 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
11677 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
11678 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11679 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11680 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11681 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11682 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11683 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11684 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11685 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11686 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11687 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11688 * </ul>
11689 *
11690 * <p><b>Note:</b> This method cannot handle <a
11691 * href="#supplementary"> supplementary characters</a>. To support
11692 * all Unicode characters, including supplementary characters, use
11693 * the {@link #isWhitespace(int)} method.
11694 *
11695 * @param ch the character to be tested.
11696 * @return {@code true} if the character is a Java whitespace
11697 * character; {@code false} otherwise.
11698 * @see Character#isSpaceChar(char)
11699 * @since 1.1
11700 */
11701 public static boolean isWhitespace(char ch) {
11702 return isWhitespace((int)ch);
11703 }
11704
11705 /**
11706 * Determines if the specified character (Unicode code point) is
11707 * white space according to Java. A character is a Java
11708 * whitespace character if and only if it satisfies one of the
11709 * following criteria:
11710 * <ul>
11711 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
11712 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
11713 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
11714 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11715 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11716 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11717 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11718 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11719 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11720 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11721 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11722 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11723 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11724 * </ul>
11725 *
11726 * @param codePoint the character (Unicode code point) to be tested.
11727 * @return {@code true} if the character is a Java whitespace
11728 * character; {@code false} otherwise.
11729 * @see Character#isSpaceChar(int)
11730 * @since 1.5
11731 */
11732 public static boolean isWhitespace(int codePoint) {
11733 return CharacterData.of(codePoint).isWhitespace(codePoint);
11734 }
11735
11736 /**
11737 * Determines if the specified character is an ISO control
11738 * character. A character is considered to be an ISO control
11739 * character if its code is in the range {@code '\u005Cu0000'}
11740 * through {@code '\u005Cu001F'} or in the range
11741 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11742 *
11743 * <p><b>Note:</b> This method cannot handle <a
11744 * href="#supplementary"> supplementary characters</a>. To support
11745 * all Unicode characters, including supplementary characters, use
11746 * the {@link #isISOControl(int)} method.
11747 *
11748 * @param ch the character to be tested.
11749 * @return {@code true} if the character is an ISO control character;
11750 * {@code false} otherwise.
11751 *
11752 * @see Character#isSpaceChar(char)
11753 * @see Character#isWhitespace(char)
11754 * @since 1.1
11755 */
11756 public static boolean isISOControl(char ch) {
11757 return isISOControl((int)ch);
11758 }
11759
11760 /**
11761 * Determines if the referenced character (Unicode code point) is an ISO control
11762 * character. A character is considered to be an ISO control
11763 * character if its code is in the range {@code '\u005Cu0000'}
11764 * through {@code '\u005Cu001F'} or in the range
11765 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11766 *
11767 * @param codePoint the character (Unicode code point) to be tested.
11768 * @return {@code true} if the character is an ISO control character;
11769 * {@code false} otherwise.
11770 * @see Character#isSpaceChar(int)
11771 * @see Character#isWhitespace(int)
11772 * @since 1.5
11773 */
11774 public static boolean isISOControl(int codePoint) {
11775 // Optimized form of:
11776 // (codePoint >= 0x00 && codePoint <= 0x1F) ||
11777 // (codePoint >= 0x7F && codePoint <= 0x9F);
11778 return codePoint <= 0x9F &&
11779 (codePoint >= 0x7F || (codePoint >>> 5 == 0));
11780 }
11781
11782 /**
11783 * Returns a value indicating a character's general category.
11784 *
11785 * <p><b>Note:</b> This method cannot handle <a
11786 * href="#supplementary"> supplementary characters</a>. To support
11787 * all Unicode characters, including supplementary characters, use
11788 * the {@link #getType(int)} method.
11789 *
11790 * @param ch the character to be tested.
11791 * @return a value of type {@code int} representing the
11792 * character's general category.
11793 * @see Character#COMBINING_SPACING_MARK
11794 * @see Character#CONNECTOR_PUNCTUATION
11795 * @see Character#CONTROL
11796 * @see Character#CURRENCY_SYMBOL
11797 * @see Character#DASH_PUNCTUATION
11798 * @see Character#DECIMAL_DIGIT_NUMBER
11799 * @see Character#ENCLOSING_MARK
11800 * @see Character#END_PUNCTUATION
11801 * @see Character#FINAL_QUOTE_PUNCTUATION
11802 * @see Character#FORMAT
11803 * @see Character#INITIAL_QUOTE_PUNCTUATION
11804 * @see Character#LETTER_NUMBER
11805 * @see Character#LINE_SEPARATOR
11806 * @see Character#LOWERCASE_LETTER
11807 * @see Character#MATH_SYMBOL
11808 * @see Character#MODIFIER_LETTER
11809 * @see Character#MODIFIER_SYMBOL
11810 * @see Character#NON_SPACING_MARK
11811 * @see Character#OTHER_LETTER
11812 * @see Character#OTHER_NUMBER
11813 * @see Character#OTHER_PUNCTUATION
11814 * @see Character#OTHER_SYMBOL
11815 * @see Character#PARAGRAPH_SEPARATOR
11816 * @see Character#PRIVATE_USE
11817 * @see Character#SPACE_SEPARATOR
11818 * @see Character#START_PUNCTUATION
11819 * @see Character#SURROGATE
11820 * @see Character#TITLECASE_LETTER
11821 * @see Character#UNASSIGNED
11822 * @see Character#UPPERCASE_LETTER
11823 * @since 1.1
11824 */
11825 public static int getType(char ch) {
11826 return getType((int)ch);
11827 }
11828
11829 /**
11830 * Returns a value indicating a character's general category.
11831 *
11832 * @param codePoint the character (Unicode code point) to be tested.
11833 * @return a value of type {@code int} representing the
11834 * character's general category.
11835 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
11836 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
11837 * @see Character#CONTROL CONTROL
11838 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
11839 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
11840 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
11841 * @see Character#ENCLOSING_MARK ENCLOSING_MARK
11842 * @see Character#END_PUNCTUATION END_PUNCTUATION
11843 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
11844 * @see Character#FORMAT FORMAT
11845 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
11846 * @see Character#LETTER_NUMBER LETTER_NUMBER
11847 * @see Character#LINE_SEPARATOR LINE_SEPARATOR
11848 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
11849 * @see Character#MATH_SYMBOL MATH_SYMBOL
11850 * @see Character#MODIFIER_LETTER MODIFIER_LETTER
11851 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
11852 * @see Character#NON_SPACING_MARK NON_SPACING_MARK
11853 * @see Character#OTHER_LETTER OTHER_LETTER
11854 * @see Character#OTHER_NUMBER OTHER_NUMBER
11855 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
11856 * @see Character#OTHER_SYMBOL OTHER_SYMBOL
11857 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
11858 * @see Character#PRIVATE_USE PRIVATE_USE
11859 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
11860 * @see Character#START_PUNCTUATION START_PUNCTUATION
11861 * @see Character#SURROGATE SURROGATE
11862 * @see Character#TITLECASE_LETTER TITLECASE_LETTER
11863 * @see Character#UNASSIGNED UNASSIGNED
11864 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
11865 * @since 1.5
11866 */
11867 public static int getType(int codePoint) {
11868 return CharacterData.of(codePoint).getType(codePoint);
11869 }
11870
11871 /**
11872 * Determines the character representation for a specific digit in
11873 * the specified radix. If the value of {@code radix} is not a
11874 * valid radix, or the value of {@code digit} is not a valid
11875 * digit in the specified radix, the null character
11876 * ({@code '\u005Cu0000'}) is returned.
11877 * <p>
11878 * The {@code radix} argument is valid if it is greater than or
11879 * equal to {@code MIN_RADIX} and less than or equal to
11880 * {@code MAX_RADIX}. The {@code digit} argument is valid if
11881 * {@code 0 <= digit < radix}.
11882 * <p>
11883 * If the digit is less than 10, then
11884 * {@code '0' + digit} is returned. Otherwise, the value
11885 * {@code 'a' + digit - 10} is returned.
11886 *
11887 * @param digit the number to convert to a character.
11888 * @param radix the radix.
11889 * @return the {@code char} representation of the specified digit
11890 * in the specified radix.
11891 * @see Character#MIN_RADIX
11892 * @see Character#MAX_RADIX
11893 * @see Character#digit(char, int)
11894 */
11895 public static char forDigit(int digit, int radix) {
11896 if ((digit >= radix) || (digit < 0)) {
11897 return '\0';
11898 }
11899 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
11900 return '\0';
11901 }
11902 if (digit < 10) {
11903 return (char)('0' + digit);
11904 }
11905 return (char)('a' - 10 + digit);
11906 }
11907
11908 /**
11909 * Returns the Unicode directionality property for the given
11910 * character. Character directionality is used to calculate the
11911 * visual ordering of text. The directionality value of undefined
11912 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
11913 *
11914 * <p><b>Note:</b> This method cannot handle <a
11915 * href="#supplementary"> supplementary characters</a>. To support
11916 * all Unicode characters, including supplementary characters, use
11917 * the {@link #getDirectionality(int)} method.
11918 *
11919 * @param ch {@code char} for which the directionality property
11920 * is requested.
11921 * @return the directionality property of the {@code char} value.
11922 *
11923 * @see Character#DIRECTIONALITY_UNDEFINED
11924 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
11925 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
11926 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11927 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
11928 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11929 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11930 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
11931 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11932 * @see Character#DIRECTIONALITY_NONSPACING_MARK
11933 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
11934 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
11935 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
11936 * @see Character#DIRECTIONALITY_WHITESPACE
11937 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
11938 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11939 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11940 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11941 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11942 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11943 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11944 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11945 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
11946 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11947 * @since 1.4
11948 */
11949 public static byte getDirectionality(char ch) {
11950 return getDirectionality((int)ch);
11951 }
11952
11953 /**
11954 * Returns the Unicode directionality property for the given
11955 * character (Unicode code point). Character directionality is
11956 * used to calculate the visual ordering of text. The
11957 * directionality value of undefined character is {@link
11958 * #DIRECTIONALITY_UNDEFINED}.
11959 *
11960 * @param codePoint the character (Unicode code point) for which
11961 * the directionality property is requested.
11962 * @return the directionality property of the character.
11963 *
11964 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
11965 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
11966 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
11967 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11968 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
11969 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11970 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11971 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
11972 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11973 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
11974 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
11975 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
11976 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
11977 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
11978 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
11979 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11980 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11981 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11982 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11983 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11984 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11985 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11986 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
11987 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11988 * @since 1.5
11989 */
11990 public static byte getDirectionality(int codePoint) {
11991 return CharacterData.of(codePoint).getDirectionality(codePoint);
11992 }
11993
11994 /**
11995 * Determines whether the character is mirrored according to the
11996 * Unicode specification. Mirrored characters should have their
11997 * glyphs horizontally mirrored when displayed in text that is
11998 * right-to-left. For example, {@code '\u005Cu0028'} LEFT
11999 * PARENTHESIS is semantically defined to be an <i>opening
12000 * parenthesis</i>. This will appear as a "(" in text that is
12001 * left-to-right but as a ")" in text that is right-to-left.
12002 *
12003 * <p><b>Note:</b> This method cannot handle <a
12004 * href="#supplementary"> supplementary characters</a>. To support
12005 * all Unicode characters, including supplementary characters, use
12006 * the {@link #isMirrored(int)} method.
12007 *
12008 * @param ch {@code char} for which the mirrored property is requested
12009 * @return {@code true} if the char is mirrored, {@code false}
12010 * if the {@code char} is not mirrored or is not defined.
12011 * @since 1.4
12012 */
12013 public static boolean isMirrored(char ch) {
12014 return isMirrored((int)ch);
12015 }
12016
12017 /**
12018 * Determines whether the specified character (Unicode code point)
12019 * is mirrored according to the Unicode specification. Mirrored
12020 * characters should have their glyphs horizontally mirrored when
12021 * displayed in text that is right-to-left. For example,
12022 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
12023 * defined to be an <i>opening parenthesis</i>. This will appear
12024 * as a "(" in text that is left-to-right but as a ")" in text
12025 * that is right-to-left.
12026 *
12027 * @param codePoint the character (Unicode code point) to be tested.
12028 * @return {@code true} if the character is mirrored, {@code false}
12029 * if the character is not mirrored or is not defined.
12030 * @since 1.5
12031 */
12032 public static boolean isMirrored(int codePoint) {
12033 return CharacterData.of(codePoint).isMirrored(codePoint);
12034 }
12035
12036 /**
12037 * Compares two {@code Character} objects numerically.
12038 *
12039 * @param anotherCharacter the {@code Character} to be compared.
12040 * @return the value {@code 0} if the argument {@code Character}
12041 * is equal to this {@code Character}; a value less than
12042 * {@code 0} if this {@code Character} is numerically less
12043 * than the {@code Character} argument; and a value greater than
12044 * {@code 0} if this {@code Character} is numerically greater
12045 * than the {@code Character} argument (unsigned comparison).
12046 * Note that this is strictly a numerical comparison; it is not
12047 * locale-dependent.
12048 * @since 1.2
12049 */
12050 public int compareTo(Character anotherCharacter) {
12051 return compare(this.value, anotherCharacter.value);
12052 }
12053
12054 /**
12055 * Compares two {@code char} values numerically.
12056 * The value returned is identical to what would be returned by:
12057 * <pre>
12058 * Character.valueOf(x).compareTo(Character.valueOf(y))
12059 * </pre>
12060 *
12061 * @param x the first {@code char} to compare
12062 * @param y the second {@code char} to compare
12063 * @return the value {@code 0} if {@code x == y};
12064 * a value less than {@code 0} if {@code x < y}; and
12065 * a value greater than {@code 0} if {@code x > y}
12066 * @since 1.7
12067 */
12068 public static int compare(char x, char y) {
12069 return x - y;
12070 }
12071
12072 /**
12073 * Converts the character (Unicode code point) argument to uppercase using
12074 * information from the UnicodeData file.
12075 *
12076 * @param codePoint the character (Unicode code point) to be converted.
12077 * @return either the uppercase equivalent of the character, if
12078 * any, or an error flag ({@code Character.ERROR})
12079 * that indicates that a 1:M {@code char} mapping exists.
12080 * @see Character#isLowerCase(char)
12081 * @see Character#isUpperCase(char)
12082 * @see Character#toLowerCase(char)
12083 * @see Character#toTitleCase(char)
12084 * @since 1.4
12085 */
12086 static int toUpperCaseEx(int codePoint) {
12087 assert isValidCodePoint(codePoint);
12088 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
12089 }
12090
12091 /**
12092 * Converts the character (Unicode code point) argument to uppercase using case
12093 * mapping information from the SpecialCasing file in the Unicode
12094 * specification. If a character has no explicit uppercase
12095 * mapping, then the {@code char} itself is returned in the
12096 * {@code char[]}.
12097 *
12098 * @param codePoint the character (Unicode code point) to be converted.
12099 * @return a {@code char[]} with the uppercased character.
12100 * @since 1.4
12101 */
12102 static char[] toUpperCaseCharArray(int codePoint) {
12103 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
12104 assert isBmpCodePoint(codePoint);
12105 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
12106 }
12107
12108 /**
12109 * The number of bits used to represent a {@code char} value in unsigned
12110 * binary form, constant {@code 16}.
12111 *
12112 * @since 1.5
12113 */
12114 public static final int SIZE = 16;
12115
12116 /**
12117 * The number of bytes used to represent a {@code char} value in unsigned
12118 * binary form.
12119 *
12120 * @since 1.8
12121 */
12122 public static final int BYTES = SIZE / Byte.SIZE;
12123
12124 /**
12125 * Returns the value obtained by reversing the order of the bytes in the
12126 * specified {@code char} value.
12127 *
12128 * @param ch The {@code char} of which to reverse the byte order.
12129 * @return the value obtained by reversing (or, equivalently, swapping)
12130 * the bytes in the specified {@code char} value.
12131 * @since 1.5
12132 */
12133 @IntrinsicCandidate
12134 public static char reverseBytes(char ch) {
12135 return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
12136 }
12137
12138 /**
12139 * Returns the name of the specified character
12140 * {@code codePoint}, or null if the code point is
12141 * {@link #UNASSIGNED unassigned}.
12142 * <p>
12143 * If the specified character is not assigned a name by
12144 * the <i>UnicodeData</i> file (part of the Unicode Character
12145 * Database maintained by the Unicode Consortium), the returned
12146 * name is the same as the result of the expression:
12147 *
12148 * <blockquote>{@code
12149 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12150 * + " "
12151 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12152 *
12153 * }</blockquote>
12154 *
12155 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name
12156 * returned by this method follows the naming scheme in the
12157 * "Unicode Name Property" section of the Unicode Standard. For other
12158 * code points, such as Hangul/Ideographs, The name generation rule above
12159 * differs from the one defined in the Unicode Standard.
12160 *
12161 * @param codePoint the character (Unicode code point)
12162 *
12163 * @return the name of the specified character, or null if
12164 * the code point is unassigned.
12165 *
12166 * @throws IllegalArgumentException if the specified
12167 * {@code codePoint} is not a valid Unicode
12168 * code point.
12169 *
12170 * @since 1.7
12171 */
12172 public static String getName(int codePoint) {
12173 if (!isValidCodePoint(codePoint)) {
12174 throw new IllegalArgumentException(
12175 String.format("Not a valid Unicode code point: 0x%X", codePoint));
12176 }
12177 String name = CharacterName.getInstance().getName(codePoint);
12178 if (name != null)
12179 return name;
12180 if (getType(codePoint) == UNASSIGNED)
12181 return null;
12182 UnicodeBlock block = UnicodeBlock.of(codePoint);
12183 if (block != null)
12184 return block.toString().replace('_', ' ') + " "
12185 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12186 // should never come here
12187 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12188 }
12189
12190 /**
12191 * Returns the code point value of the Unicode character specified by
12192 * the given character name.
12193 * <p>
12194 * If a character is not assigned a name by the <i>UnicodeData</i>
12195 * file (part of the Unicode Character Database maintained by the Unicode
12196 * Consortium), its name is defined as the result of the expression:
12197 *
12198 * <blockquote>{@code
12199 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12200 * + " "
12201 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12202 *
12203 * }</blockquote>
12204 * <p>
12205 * The {@code name} matching is case insensitive, with any leading and
12206 * trailing whitespace character removed.
12207 *
12208 * For the code points in the <i>UnicodeData</i> file, this method
12209 * recognizes the name which conforms to the name defined in the
12210 * "Unicode Name Property" section in the Unicode Standard. For other
12211 * code points, this method recognizes the name generated with
12212 * {@link #getName(int)} method.
12213 *
12214 * @param name the character name
12215 *
12216 * @return the code point value of the character specified by its name.
12217 *
12218 * @throws IllegalArgumentException if the specified {@code name}
12219 * is not a valid character name.
12220 * @throws NullPointerException if {@code name} is {@code null}
12221 *
12222 * @since 9
12223 */
12224 public static int codePointOf(String name) {
12225 name = name.trim().toUpperCase(Locale.ROOT);
12226 int cp = CharacterName.getInstance().getCodePoint(name);
12227 if (cp != -1)
12228 return cp;
12229 try {
12230 int off = name.lastIndexOf(' ');
12231 if (off != -1) {
12232 cp = Integer.parseInt(name, off + 1, name.length(), 16);
12233 if (isValidCodePoint(cp) && name.equals(getName(cp)))
12234 return cp;
12235 }
12236 } catch (Exception x) {}
12237 throw new IllegalArgumentException("Unrecognized character name :" + name);
12238 }
12239 }