1 /* 2 * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import jdk.internal.misc.CDS; 29 import jdk.internal.vm.annotation.AOTSafeClassInitializer; 30 import jdk.internal.vm.annotation.IntrinsicCandidate; 31 import jdk.internal.vm.annotation.Stable; 32 33 import java.lang.constant.Constable; 34 import java.lang.constant.DynamicConstantDesc; 35 import java.util.Arrays; 36 import java.util.HashMap; 37 import java.util.Locale; 38 import java.util.Map; 39 import java.util.Objects; 40 import java.util.Optional; 41 42 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 43 import static java.lang.constant.ConstantDescs.CD_char; 44 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 45 46 /** 47 * The {@code Character} class is the {@linkplain 48 * java.lang##wrapperClass wrapper class} for values of the primitive 49 * type {@code char}. An object of type {@code Character} contains a 50 * single field whose type is {@code char}. 51 * 52 * <p>In addition, this class provides a large number of static methods for 53 * determining a character's category (lowercase letter, digit, etc.) 54 * and for converting characters from uppercase to lowercase and vice 55 * versa. 56 * 57 * <h2><a id="conformance">Unicode Conformance</a></h2> 58 * <p> 59 * The fields and methods of class {@code Character} are defined in terms 60 * of character information from the Unicode Standard, specifically the 61 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 62 * This file specifies properties including name and category for every 63 * assigned Unicode code point or character range. The file is available 64 * from the Unicode Consortium at 65 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 66 * <p> 67 * Character information is based on the Unicode Standard, version 17.0. 68 * <p> 69 * The Java platform has supported different versions of the Unicode 70 * Standard over time. Upgrades to newer versions of the Unicode Standard 71 * occurred in the following Java releases, each indicating the new version: 72 * <table class="striped"> 73 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 74 * <thead> 75 * <tr><th scope="col">Java release</th> 76 * <th scope="col">Unicode version</th></tr> 77 * </thead> 78 * <tbody> 79 * <tr><th scope="row" style="text-align:left">Java SE 26</th> 80 * <td>Unicode 17.0</td></tr> 81 * <tr><th scope="row" style="text-align:left">Java SE 24</th> 82 * <td>Unicode 16.0</td></tr> 83 * <tr><th scope="row" style="text-align:left">Java SE 22</th> 84 * <td>Unicode 15.1</td></tr> 85 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 86 * <td>Unicode 15.0</td></tr> 87 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 88 * <td>Unicode 14.0</td></tr> 89 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 90 * <td>Unicode 13.0</td></tr> 91 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 92 * <td>Unicode 12.1</td></tr> 93 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 94 * <td>Unicode 11.0</td></tr> 95 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 96 * <td>Unicode 10.0</td></tr> 97 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 98 * <td>Unicode 8.0</td></tr> 99 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 100 * <td>Unicode 6.2</td></tr> 101 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 102 * <td>Unicode 6.0</td></tr> 103 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 104 * <td>Unicode 4.0</td></tr> 105 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 106 * <td>Unicode 3.0</td></tr> 107 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 108 * <td>Unicode 2.0</td></tr> 109 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 110 * <td>Unicode 1.1.5</td></tr> 111 * </tbody> 112 * </table> 113 * Variations from these base Unicode versions, such as recognized appendixes, 114 * are documented elsewhere. 115 * <h2><a id="unicode">Unicode Character Representations</a></h2> 116 * 117 * <p>The {@code char} data type (and therefore the value that a 118 * {@code Character} object encapsulates) are based on the 119 * original Unicode specification, which defined characters as 120 * fixed-width 16-bit entities. The Unicode Standard has since been 121 * changed to allow for characters whose representation requires more 122 * than 16 bits. The range of legal <em>code point</em>s is now 123 * U+0000 to U+10FFFF, known as 124 * <em><a href="https://www.unicode.org/glossary/#unicode_scalar_value"> 125 * Unicode scalar value</a></em>. 126 * 127 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 128 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 129 * <a id="supplementary">Characters</a> whose code points are greater 130 * than U+FFFF are called <em>supplementary character</em>s. The Java 131 * platform uses the UTF-16 representation in {@code char} arrays and 132 * in the {@code String} and {@code StringBuffer} classes. In 133 * this representation, supplementary characters are represented as a pair 134 * of {@code char} values, the first from the <em>high-surrogates</em> 135 * range, (\uD800-\uDBFF), the second from the 136 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 137 * 138 * <p>A {@code char} value, therefore, represents Basic 139 * Multilingual Plane (BMP) code points, including the surrogate 140 * code points, or code units of the UTF-16 encoding. An 141 * {@code int} value represents all Unicode code points, 142 * including supplementary code points. The lower (least significant) 143 * 21 bits of {@code int} are used to represent Unicode code 144 * points and the upper (most significant) 11 bits must be zero. 145 * Unless otherwise specified, the behavior with respect to 146 * supplementary characters and surrogate {@code char} values is 147 * as follows: 148 * 149 * <ul> 150 * <li>The methods that only accept a {@code char} value cannot support 151 * supplementary characters. They treat {@code char} values from the 152 * surrogate ranges as undefined characters. For example, 153 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 154 * this specific value if followed by any low-surrogate value in a string 155 * would represent a letter. 156 * 157 * <li>The methods that accept an {@code int} value support all 158 * Unicode characters, including supplementary characters. For 159 * example, {@code Character.isLetter(0x2F81A)} returns 160 * {@code true} because the code point value represents a letter 161 * (a CJK ideograph). 162 * </ul> 163 * 164 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 165 * used for character values in the range between U+0000 and U+10FFFF, 166 * and <em>Unicode code unit</em> is used for 16-bit 167 * {@code char} values that are code units of the <em>UTF-16</em> 168 * encoding. For more information on Unicode terminology, refer to the 169 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 170 * 171 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 172 * class; programmers should treat instances that are 173 * {@linkplain #equals(Object) equal} as interchangeable and should not 174 * use instances for synchronization, or unpredictable behavior may 175 * occur. For example, in a future release, synchronization may fail. 176 * 177 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 178 * @author Lee Boynton 179 * @author Guy Steele 180 * @author Akira Tanaka 181 * @author Martin Buchholz 182 * @author Ulf Zibis 183 * @since 1.0 184 */ 185 @jdk.internal.ValueBased 186 public final 187 class Character implements java.io.Serializable, Comparable<Character>, Constable { 188 /** 189 * The minimum radix available for conversion to and from strings. 190 * The constant value of this field is the smallest value permitted 191 * for the radix argument in radix-conversion methods such as the 192 * {@code digit} method, the {@code forDigit} method, and the 193 * {@code toString} method of class {@code Integer}. 194 * 195 * @see Character#digit(char, int) 196 * @see Character#forDigit(int, int) 197 * @see Integer#toString(int, int) 198 * @see Integer#valueOf(String) 199 */ 200 public static final int MIN_RADIX = 2; 201 202 /** 203 * The maximum radix available for conversion to and from strings. 204 * The constant value of this field is the largest value permitted 205 * for the radix argument in radix-conversion methods such as the 206 * {@code digit} method, the {@code forDigit} method, and the 207 * {@code toString} method of class {@code Integer}. 208 * 209 * @see Character#digit(char, int) 210 * @see Character#forDigit(int, int) 211 * @see Integer#toString(int, int) 212 * @see Integer#valueOf(String) 213 */ 214 public static final int MAX_RADIX = 36; 215 216 /** 217 * The constant value of this field is the smallest value of type 218 * {@code char}, {@code '\u005Cu0000'}. 219 * 220 * @since 1.0.2 221 */ 222 public static final char MIN_VALUE = '\u0000'; 223 224 /** 225 * The constant value of this field is the largest value of type 226 * {@code char}, {@code '\u005CuFFFF'}. 227 * 228 * @since 1.0.2 229 */ 230 public static final char MAX_VALUE = '\uFFFF'; 231 232 /** 233 * The {@code Class} instance representing the primitive type 234 * {@code char}. 235 * 236 * @since 1.1 237 */ 238 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 239 240 /* 241 * Normative general types 242 */ 243 244 /* 245 * General character types 246 */ 247 248 /** 249 * General category "Cn" in the Unicode specification. 250 * @since 1.1 251 */ 252 public static final byte UNASSIGNED = 0; 253 254 /** 255 * General category "Lu" in the Unicode specification. 256 * @since 1.1 257 */ 258 public static final byte UPPERCASE_LETTER = 1; 259 260 /** 261 * General category "Ll" in the Unicode specification. 262 * @since 1.1 263 */ 264 public static final byte LOWERCASE_LETTER = 2; 265 266 /** 267 * General category "Lt" in the Unicode specification. 268 * @since 1.1 269 */ 270 public static final byte TITLECASE_LETTER = 3; 271 272 /** 273 * General category "Lm" in the Unicode specification. 274 * @since 1.1 275 */ 276 public static final byte MODIFIER_LETTER = 4; 277 278 /** 279 * General category "Lo" in the Unicode specification. 280 * @since 1.1 281 */ 282 public static final byte OTHER_LETTER = 5; 283 284 /** 285 * General category "Mn" in the Unicode specification. 286 * @since 1.1 287 */ 288 public static final byte NON_SPACING_MARK = 6; 289 290 /** 291 * General category "Me" in the Unicode specification. 292 * @since 1.1 293 */ 294 public static final byte ENCLOSING_MARK = 7; 295 296 /** 297 * General category "Mc" in the Unicode specification. 298 * @since 1.1 299 */ 300 public static final byte COMBINING_SPACING_MARK = 8; 301 302 /** 303 * General category "Nd" in the Unicode specification. 304 * @since 1.1 305 */ 306 public static final byte DECIMAL_DIGIT_NUMBER = 9; 307 308 /** 309 * General category "Nl" in the Unicode specification. 310 * @since 1.1 311 */ 312 public static final byte LETTER_NUMBER = 10; 313 314 /** 315 * General category "No" in the Unicode specification. 316 * @since 1.1 317 */ 318 public static final byte OTHER_NUMBER = 11; 319 320 /** 321 * General category "Zs" in the Unicode specification. 322 * @since 1.1 323 */ 324 public static final byte SPACE_SEPARATOR = 12; 325 326 /** 327 * General category "Zl" in the Unicode specification. 328 * @since 1.1 329 */ 330 public static final byte LINE_SEPARATOR = 13; 331 332 /** 333 * General category "Zp" in the Unicode specification. 334 * @since 1.1 335 */ 336 public static final byte PARAGRAPH_SEPARATOR = 14; 337 338 /** 339 * General category "Cc" in the Unicode specification. 340 * @since 1.1 341 */ 342 public static final byte CONTROL = 15; 343 344 /** 345 * General category "Cf" in the Unicode specification. 346 * @since 1.1 347 */ 348 public static final byte FORMAT = 16; 349 350 /** 351 * General category "Co" in the Unicode specification. 352 * @since 1.1 353 */ 354 public static final byte PRIVATE_USE = 18; 355 356 /** 357 * General category "Cs" in the Unicode specification. 358 * @since 1.1 359 */ 360 public static final byte SURROGATE = 19; 361 362 /** 363 * General category "Pd" in the Unicode specification. 364 * @since 1.1 365 */ 366 public static final byte DASH_PUNCTUATION = 20; 367 368 /** 369 * General category "Ps" in the Unicode specification. 370 * @since 1.1 371 */ 372 public static final byte START_PUNCTUATION = 21; 373 374 /** 375 * General category "Pe" in the Unicode specification. 376 * @since 1.1 377 */ 378 public static final byte END_PUNCTUATION = 22; 379 380 /** 381 * General category "Pc" in the Unicode specification. 382 * @since 1.1 383 */ 384 public static final byte CONNECTOR_PUNCTUATION = 23; 385 386 /** 387 * General category "Po" in the Unicode specification. 388 * @since 1.1 389 */ 390 public static final byte OTHER_PUNCTUATION = 24; 391 392 /** 393 * General category "Sm" in the Unicode specification. 394 * @since 1.1 395 */ 396 public static final byte MATH_SYMBOL = 25; 397 398 /** 399 * General category "Sc" in the Unicode specification. 400 * @since 1.1 401 */ 402 public static final byte CURRENCY_SYMBOL = 26; 403 404 /** 405 * General category "Sk" in the Unicode specification. 406 * @since 1.1 407 */ 408 public static final byte MODIFIER_SYMBOL = 27; 409 410 /** 411 * General category "So" in the Unicode specification. 412 * @since 1.1 413 */ 414 public static final byte OTHER_SYMBOL = 28; 415 416 /** 417 * General category "Pi" in the Unicode specification. 418 * @since 1.4 419 */ 420 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 421 422 /** 423 * General category "Pf" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 427 428 /** 429 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 430 */ 431 static final int ERROR = 0xFFFFFFFF; 432 433 434 /** 435 * Undefined bidirectional character type. Undefined {@code char} 436 * values have undefined directionality in the Unicode specification. 437 * @since 1.4 438 */ 439 public static final byte DIRECTIONALITY_UNDEFINED = -1; 440 441 /** 442 * Strong bidirectional character type "L" in the Unicode specification. 443 * @since 1.4 444 */ 445 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 446 447 /** 448 * Strong bidirectional character type "R" in the Unicode specification. 449 * @since 1.4 450 */ 451 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 452 453 /** 454 * Strong bidirectional character type "AL" in the Unicode specification. 455 * @since 1.4 456 */ 457 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 458 459 /** 460 * Weak bidirectional character type "EN" in the Unicode specification. 461 * @since 1.4 462 */ 463 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 464 465 /** 466 * Weak bidirectional character type "ES" in the Unicode specification. 467 * @since 1.4 468 */ 469 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 470 471 /** 472 * Weak bidirectional character type "ET" in the Unicode specification. 473 * @since 1.4 474 */ 475 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 476 477 /** 478 * Weak bidirectional character type "AN" in the Unicode specification. 479 * @since 1.4 480 */ 481 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 482 483 /** 484 * Weak bidirectional character type "CS" in the Unicode specification. 485 * @since 1.4 486 */ 487 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 488 489 /** 490 * Weak bidirectional character type "NSM" in the Unicode specification. 491 * @since 1.4 492 */ 493 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 494 495 /** 496 * Weak bidirectional character type "BN" in the Unicode specification. 497 * @since 1.4 498 */ 499 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 500 501 /** 502 * Neutral bidirectional character type "B" in the Unicode specification. 503 * @since 1.4 504 */ 505 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 506 507 /** 508 * Neutral bidirectional character type "S" in the Unicode specification. 509 * @since 1.4 510 */ 511 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 512 513 /** 514 * Neutral bidirectional character type "WS" in the Unicode specification. 515 * @since 1.4 516 */ 517 public static final byte DIRECTIONALITY_WHITESPACE = 12; 518 519 /** 520 * Neutral bidirectional character type "ON" in the Unicode specification. 521 * @since 1.4 522 */ 523 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 524 525 /** 526 * Strong bidirectional character type "LRE" in the Unicode specification. 527 * @since 1.4 528 */ 529 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 530 531 /** 532 * Strong bidirectional character type "LRO" in the Unicode specification. 533 * @since 1.4 534 */ 535 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 536 537 /** 538 * Strong bidirectional character type "RLE" in the Unicode specification. 539 * @since 1.4 540 */ 541 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 542 543 /** 544 * Strong bidirectional character type "RLO" in the Unicode specification. 545 * @since 1.4 546 */ 547 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 548 549 /** 550 * Weak bidirectional character type "PDF" in the Unicode specification. 551 * @since 1.4 552 */ 553 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 554 555 /** 556 * Weak bidirectional character type "LRI" in the Unicode specification. 557 * @since 9 558 */ 559 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 560 561 /** 562 * Weak bidirectional character type "RLI" in the Unicode specification. 563 * @since 9 564 */ 565 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 566 567 /** 568 * Weak bidirectional character type "FSI" in the Unicode specification. 569 * @since 9 570 */ 571 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 572 573 /** 574 * Weak bidirectional character type "PDI" in the Unicode specification. 575 * @since 9 576 */ 577 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 578 579 /** 580 * The minimum value of a 581 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 582 * Unicode high-surrogate code unit</a> 583 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 584 * A high-surrogate is also known as a <i>leading-surrogate</i>. 585 * 586 * @since 1.5 587 */ 588 public static final char MIN_HIGH_SURROGATE = '\uD800'; 589 590 /** 591 * The maximum value of a 592 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 593 * Unicode high-surrogate code unit</a> 594 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 595 * A high-surrogate is also known as a <i>leading-surrogate</i>. 596 * 597 * @since 1.5 598 */ 599 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 600 601 /** 602 * The minimum value of a 603 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 604 * Unicode low-surrogate code unit</a> 605 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 606 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 607 * 608 * @since 1.5 609 */ 610 public static final char MIN_LOW_SURROGATE = '\uDC00'; 611 612 /** 613 * The maximum value of a 614 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 615 * Unicode low-surrogate code unit</a> 616 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 617 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 618 * 619 * @since 1.5 620 */ 621 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 622 623 /** 624 * The minimum value of a Unicode surrogate code unit in the 625 * UTF-16 encoding, constant {@code '\u005CuD800'}. 626 * 627 * @since 1.5 628 */ 629 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 630 631 /** 632 * The maximum value of a Unicode surrogate code unit in the 633 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 634 * 635 * @since 1.5 636 */ 637 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 638 639 /** 640 * The minimum value of a 641 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 642 * Unicode supplementary code point</a>, constant {@code U+10000}. 643 * 644 * @since 1.5 645 */ 646 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 647 648 /** 649 * The minimum value of a 650 * <a href="http://www.unicode.org/glossary/#code_point"> 651 * Unicode code point</a>, constant {@code U+0000}. 652 * 653 * @since 1.5 654 */ 655 public static final int MIN_CODE_POINT = 0x000000; 656 657 /** 658 * The maximum value of a 659 * <a href="http://www.unicode.org/glossary/#code_point"> 660 * Unicode code point</a>, constant {@code U+10FFFF}. 661 * 662 * @since 1.5 663 */ 664 public static final int MAX_CODE_POINT = 0X10FFFF; 665 666 /** 667 * Returns an {@link Optional} containing the nominal descriptor for this 668 * instance. 669 * 670 * @return an {@link Optional} describing the {@linkplain Character} instance 671 * @since 15 672 */ 673 @Override 674 public Optional<DynamicConstantDesc<Character>> describeConstable() { 675 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 676 } 677 678 /** 679 * Instances of this class represent particular subsets of the Unicode 680 * character set. The only family of subsets defined in the 681 * {@code Character} class is {@link Character.UnicodeBlock}. 682 * Other portions of the Java API may define other subsets for their 683 * own purposes. 684 * 685 * @since 1.2 686 */ 687 public static class Subset { 688 689 private String name; 690 691 /** 692 * Constructs a new {@code Subset} instance. 693 * 694 * @param name The name of this subset 695 * @throws NullPointerException if name is {@code null} 696 */ 697 protected Subset(String name) { 698 if (name == null) { 699 throw new NullPointerException("name"); 700 } 701 this.name = name; 702 } 703 704 /** 705 * Compares two {@code Subset} objects for equality. 706 * This method returns {@code true} if and only if 707 * {@code this} and the argument refer to the same 708 * object; since this method is {@code final}, this 709 * guarantee holds for all subclasses. 710 */ 711 public final boolean equals(Object obj) { 712 return (this == obj); 713 } 714 715 /** 716 * Returns the standard hash code as defined by the 717 * {@link Object#hashCode} method. This method 718 * is {@code final} in order to ensure that the 719 * {@code equals} and {@code hashCode} methods will 720 * be consistent in all subclasses. 721 */ 722 public final int hashCode() { 723 return super.hashCode(); 724 } 725 726 /** 727 * Returns the name of this subset. 728 */ 729 public final String toString() { 730 return name; 731 } 732 } 733 734 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 735 // for the latest specification of Unicode Blocks. 736 737 /** 738 * A family of character subsets representing the character blocks in the 739 * Unicode specification. Character blocks generally define characters 740 * used for a specific script or purpose. A character is contained by 741 * at most one Unicode block. 742 * 743 * @since 1.2 744 */ 745 public static final class UnicodeBlock extends Subset { 746 /** 747 * NUM_ENTITIES should match the total number of UnicodeBlock identifier 748 * names plus their aliases. 749 * It should be adjusted whenever the Unicode Character Database 750 * is upgraded. 751 */ 752 private static final int NUM_ENTITIES = 804; 753 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 754 755 /** 756 * Creates a UnicodeBlock with the given identifier name. 757 * This name must be the same as the block identifier. 758 */ 759 private UnicodeBlock(String idName) { 760 super(idName); 761 map.put(idName, this); 762 } 763 764 /** 765 * Creates a UnicodeBlock with the given identifier name and 766 * alias name. 767 */ 768 private UnicodeBlock(String idName, String alias) { 769 this(idName); 770 map.put(alias, this); 771 } 772 773 /** 774 * Creates a UnicodeBlock with the given identifier name and 775 * alias names. 776 */ 777 private UnicodeBlock(String idName, String... aliases) { 778 this(idName); 779 for (String alias : aliases) 780 map.put(alias, this); 781 } 782 783 /** 784 * Constant for the "Basic Latin" Unicode character block. 785 * @since 1.2 786 */ 787 public static final UnicodeBlock BASIC_LATIN = 788 new UnicodeBlock("BASIC_LATIN", 789 "BASIC LATIN", 790 "BASICLATIN"); 791 792 /** 793 * Constant for the "Latin-1 Supplement" Unicode character block. 794 * @since 1.2 795 */ 796 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 797 new UnicodeBlock("LATIN_1_SUPPLEMENT", 798 "LATIN-1 SUPPLEMENT", 799 "LATIN-1SUPPLEMENT"); 800 801 /** 802 * Constant for the "Latin Extended-A" Unicode character block. 803 * @since 1.2 804 */ 805 public static final UnicodeBlock LATIN_EXTENDED_A = 806 new UnicodeBlock("LATIN_EXTENDED_A", 807 "LATIN EXTENDED-A", 808 "LATINEXTENDED-A"); 809 810 /** 811 * Constant for the "Latin Extended-B" Unicode character block. 812 * @since 1.2 813 */ 814 public static final UnicodeBlock LATIN_EXTENDED_B = 815 new UnicodeBlock("LATIN_EXTENDED_B", 816 "LATIN EXTENDED-B", 817 "LATINEXTENDED-B"); 818 819 /** 820 * Constant for the "IPA Extensions" Unicode character block. 821 * @since 1.2 822 */ 823 public static final UnicodeBlock IPA_EXTENSIONS = 824 new UnicodeBlock("IPA_EXTENSIONS", 825 "IPA EXTENSIONS", 826 "IPAEXTENSIONS"); 827 828 /** 829 * Constant for the "Spacing Modifier Letters" Unicode character block. 830 * @since 1.2 831 */ 832 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 833 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 834 "SPACING MODIFIER LETTERS", 835 "SPACINGMODIFIERLETTERS"); 836 837 /** 838 * Constant for the "Combining Diacritical Marks" Unicode character block. 839 * @since 1.2 840 */ 841 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 842 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 843 "COMBINING DIACRITICAL MARKS", 844 "COMBININGDIACRITICALMARKS"); 845 846 /** 847 * Constant for the "Greek and Coptic" Unicode character block. 848 * <p> 849 * This block was previously known as the "Greek" block. 850 * 851 * @since 1.2 852 */ 853 public static final UnicodeBlock GREEK = 854 new UnicodeBlock("GREEK", 855 "GREEK AND COPTIC", 856 "GREEKANDCOPTIC"); 857 858 /** 859 * Constant for the "Cyrillic" Unicode character block. 860 * @since 1.2 861 */ 862 public static final UnicodeBlock CYRILLIC = 863 new UnicodeBlock("CYRILLIC"); 864 865 /** 866 * Constant for the "Armenian" Unicode character block. 867 * @since 1.2 868 */ 869 public static final UnicodeBlock ARMENIAN = 870 new UnicodeBlock("ARMENIAN"); 871 872 /** 873 * Constant for the "Hebrew" Unicode character block. 874 * @since 1.2 875 */ 876 public static final UnicodeBlock HEBREW = 877 new UnicodeBlock("HEBREW"); 878 879 /** 880 * Constant for the "Arabic" Unicode character block. 881 * @since 1.2 882 */ 883 public static final UnicodeBlock ARABIC = 884 new UnicodeBlock("ARABIC"); 885 886 /** 887 * Constant for the "Devanagari" Unicode character block. 888 * @since 1.2 889 */ 890 public static final UnicodeBlock DEVANAGARI = 891 new UnicodeBlock("DEVANAGARI"); 892 893 /** 894 * Constant for the "Bengali" Unicode character block. 895 * @since 1.2 896 */ 897 public static final UnicodeBlock BENGALI = 898 new UnicodeBlock("BENGALI"); 899 900 /** 901 * Constant for the "Gurmukhi" Unicode character block. 902 * @since 1.2 903 */ 904 public static final UnicodeBlock GURMUKHI = 905 new UnicodeBlock("GURMUKHI"); 906 907 /** 908 * Constant for the "Gujarati" Unicode character block. 909 * @since 1.2 910 */ 911 public static final UnicodeBlock GUJARATI = 912 new UnicodeBlock("GUJARATI"); 913 914 /** 915 * Constant for the "Oriya" Unicode character block. 916 * @since 1.2 917 */ 918 public static final UnicodeBlock ORIYA = 919 new UnicodeBlock("ORIYA"); 920 921 /** 922 * Constant for the "Tamil" Unicode character block. 923 * @since 1.2 924 */ 925 public static final UnicodeBlock TAMIL = 926 new UnicodeBlock("TAMIL"); 927 928 /** 929 * Constant for the "Telugu" Unicode character block. 930 * @since 1.2 931 */ 932 public static final UnicodeBlock TELUGU = 933 new UnicodeBlock("TELUGU"); 934 935 /** 936 * Constant for the "Kannada" Unicode character block. 937 * @since 1.2 938 */ 939 public static final UnicodeBlock KANNADA = 940 new UnicodeBlock("KANNADA"); 941 942 /** 943 * Constant for the "Malayalam" Unicode character block. 944 * @since 1.2 945 */ 946 public static final UnicodeBlock MALAYALAM = 947 new UnicodeBlock("MALAYALAM"); 948 949 /** 950 * Constant for the "Thai" Unicode character block. 951 * @since 1.2 952 */ 953 public static final UnicodeBlock THAI = 954 new UnicodeBlock("THAI"); 955 956 /** 957 * Constant for the "Lao" Unicode character block. 958 * @since 1.2 959 */ 960 public static final UnicodeBlock LAO = 961 new UnicodeBlock("LAO"); 962 963 /** 964 * Constant for the "Tibetan" Unicode character block. 965 * @since 1.2 966 */ 967 public static final UnicodeBlock TIBETAN = 968 new UnicodeBlock("TIBETAN"); 969 970 /** 971 * Constant for the "Georgian" Unicode character block. 972 * @since 1.2 973 */ 974 public static final UnicodeBlock GEORGIAN = 975 new UnicodeBlock("GEORGIAN"); 976 977 /** 978 * Constant for the "Hangul Jamo" Unicode character block. 979 * @since 1.2 980 */ 981 public static final UnicodeBlock HANGUL_JAMO = 982 new UnicodeBlock("HANGUL_JAMO", 983 "HANGUL JAMO", 984 "HANGULJAMO"); 985 986 /** 987 * Constant for the "Latin Extended Additional" Unicode character block. 988 * @since 1.2 989 */ 990 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 991 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 992 "LATIN EXTENDED ADDITIONAL", 993 "LATINEXTENDEDADDITIONAL"); 994 995 /** 996 * Constant for the "Greek Extended" Unicode character block. 997 * @since 1.2 998 */ 999 public static final UnicodeBlock GREEK_EXTENDED = 1000 new UnicodeBlock("GREEK_EXTENDED", 1001 "GREEK EXTENDED", 1002 "GREEKEXTENDED"); 1003 1004 /** 1005 * Constant for the "General Punctuation" Unicode character block. 1006 * @since 1.2 1007 */ 1008 public static final UnicodeBlock GENERAL_PUNCTUATION = 1009 new UnicodeBlock("GENERAL_PUNCTUATION", 1010 "GENERAL PUNCTUATION", 1011 "GENERALPUNCTUATION"); 1012 1013 /** 1014 * Constant for the "Superscripts and Subscripts" Unicode character 1015 * block. 1016 * @since 1.2 1017 */ 1018 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1019 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1020 "SUPERSCRIPTS AND SUBSCRIPTS", 1021 "SUPERSCRIPTSANDSUBSCRIPTS"); 1022 1023 /** 1024 * Constant for the "Currency Symbols" Unicode character block. 1025 * @since 1.2 1026 */ 1027 public static final UnicodeBlock CURRENCY_SYMBOLS = 1028 new UnicodeBlock("CURRENCY_SYMBOLS", 1029 "CURRENCY SYMBOLS", 1030 "CURRENCYSYMBOLS"); 1031 1032 /** 1033 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1034 * character block. 1035 * <p> 1036 * This block was previously known as "Combining Marks for Symbols". 1037 * @since 1.2 1038 */ 1039 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1040 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1041 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1042 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1043 "COMBINING MARKS FOR SYMBOLS", 1044 "COMBININGMARKSFORSYMBOLS"); 1045 1046 /** 1047 * Constant for the "Letterlike Symbols" Unicode character block. 1048 * @since 1.2 1049 */ 1050 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1051 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1052 "LETTERLIKE SYMBOLS", 1053 "LETTERLIKESYMBOLS"); 1054 1055 /** 1056 * Constant for the "Number Forms" Unicode character block. 1057 * @since 1.2 1058 */ 1059 public static final UnicodeBlock NUMBER_FORMS = 1060 new UnicodeBlock("NUMBER_FORMS", 1061 "NUMBER FORMS", 1062 "NUMBERFORMS"); 1063 1064 /** 1065 * Constant for the "Arrows" Unicode character block. 1066 * @since 1.2 1067 */ 1068 public static final UnicodeBlock ARROWS = 1069 new UnicodeBlock("ARROWS"); 1070 1071 /** 1072 * Constant for the "Mathematical Operators" Unicode character block. 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1076 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1077 "MATHEMATICAL OPERATORS", 1078 "MATHEMATICALOPERATORS"); 1079 1080 /** 1081 * Constant for the "Miscellaneous Technical" Unicode character block. 1082 * @since 1.2 1083 */ 1084 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1085 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1086 "MISCELLANEOUS TECHNICAL", 1087 "MISCELLANEOUSTECHNICAL"); 1088 1089 /** 1090 * Constant for the "Control Pictures" Unicode character block. 1091 * @since 1.2 1092 */ 1093 public static final UnicodeBlock CONTROL_PICTURES = 1094 new UnicodeBlock("CONTROL_PICTURES", 1095 "CONTROL PICTURES", 1096 "CONTROLPICTURES"); 1097 1098 /** 1099 * Constant for the "Optical Character Recognition" Unicode character block. 1100 * @since 1.2 1101 */ 1102 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1103 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1104 "OPTICAL CHARACTER RECOGNITION", 1105 "OPTICALCHARACTERRECOGNITION"); 1106 1107 /** 1108 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1109 * @since 1.2 1110 */ 1111 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1112 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1113 "ENCLOSED ALPHANUMERICS", 1114 "ENCLOSEDALPHANUMERICS"); 1115 1116 /** 1117 * Constant for the "Box Drawing" Unicode character block. 1118 * @since 1.2 1119 */ 1120 public static final UnicodeBlock BOX_DRAWING = 1121 new UnicodeBlock("BOX_DRAWING", 1122 "BOX DRAWING", 1123 "BOXDRAWING"); 1124 1125 /** 1126 * Constant for the "Block Elements" Unicode character block. 1127 * @since 1.2 1128 */ 1129 public static final UnicodeBlock BLOCK_ELEMENTS = 1130 new UnicodeBlock("BLOCK_ELEMENTS", 1131 "BLOCK ELEMENTS", 1132 "BLOCKELEMENTS"); 1133 1134 /** 1135 * Constant for the "Geometric Shapes" Unicode character block. 1136 * @since 1.2 1137 */ 1138 public static final UnicodeBlock GEOMETRIC_SHAPES = 1139 new UnicodeBlock("GEOMETRIC_SHAPES", 1140 "GEOMETRIC SHAPES", 1141 "GEOMETRICSHAPES"); 1142 1143 /** 1144 * Constant for the "Miscellaneous Symbols" Unicode character block. 1145 * @since 1.2 1146 */ 1147 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1148 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1149 "MISCELLANEOUS SYMBOLS", 1150 "MISCELLANEOUSSYMBOLS"); 1151 1152 /** 1153 * Constant for the "Dingbats" Unicode character block. 1154 * @since 1.2 1155 */ 1156 public static final UnicodeBlock DINGBATS = 1157 new UnicodeBlock("DINGBATS"); 1158 1159 /** 1160 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1161 * @since 1.2 1162 */ 1163 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1164 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1165 "CJK SYMBOLS AND PUNCTUATION", 1166 "CJKSYMBOLSANDPUNCTUATION"); 1167 1168 /** 1169 * Constant for the "Hiragana" Unicode character block. 1170 * @since 1.2 1171 */ 1172 public static final UnicodeBlock HIRAGANA = 1173 new UnicodeBlock("HIRAGANA"); 1174 1175 /** 1176 * Constant for the "Katakana" Unicode character block. 1177 * @since 1.2 1178 */ 1179 public static final UnicodeBlock KATAKANA = 1180 new UnicodeBlock("KATAKANA"); 1181 1182 /** 1183 * Constant for the "Bopomofo" Unicode character block. 1184 * @since 1.2 1185 */ 1186 public static final UnicodeBlock BOPOMOFO = 1187 new UnicodeBlock("BOPOMOFO"); 1188 1189 /** 1190 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1191 * @since 1.2 1192 */ 1193 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1194 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1195 "HANGUL COMPATIBILITY JAMO", 1196 "HANGULCOMPATIBILITYJAMO"); 1197 1198 /** 1199 * Constant for the "Kanbun" Unicode character block. 1200 * @since 1.2 1201 */ 1202 public static final UnicodeBlock KANBUN = 1203 new UnicodeBlock("KANBUN"); 1204 1205 /** 1206 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1210 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1211 "ENCLOSED CJK LETTERS AND MONTHS", 1212 "ENCLOSEDCJKLETTERSANDMONTHS"); 1213 1214 /** 1215 * Constant for the "CJK Compatibility" Unicode character block. 1216 * @since 1.2 1217 */ 1218 public static final UnicodeBlock CJK_COMPATIBILITY = 1219 new UnicodeBlock("CJK_COMPATIBILITY", 1220 "CJK COMPATIBILITY", 1221 "CJKCOMPATIBILITY"); 1222 1223 /** 1224 * Constant for the "CJK Unified Ideographs" Unicode character block. 1225 * @since 1.2 1226 */ 1227 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1228 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1229 "CJK UNIFIED IDEOGRAPHS", 1230 "CJKUNIFIEDIDEOGRAPHS"); 1231 1232 /** 1233 * Constant for the "Hangul Syllables" Unicode character block. 1234 * @since 1.2 1235 */ 1236 public static final UnicodeBlock HANGUL_SYLLABLES = 1237 new UnicodeBlock("HANGUL_SYLLABLES", 1238 "HANGUL SYLLABLES", 1239 "HANGULSYLLABLES"); 1240 1241 /** 1242 * Constant for the "Private Use Area" Unicode character block. 1243 * @since 1.2 1244 */ 1245 public static final UnicodeBlock PRIVATE_USE_AREA = 1246 new UnicodeBlock("PRIVATE_USE_AREA", 1247 "PRIVATE USE AREA", 1248 "PRIVATEUSEAREA"); 1249 1250 /** 1251 * Constant for the "CJK Compatibility Ideographs" Unicode character 1252 * block. 1253 * @since 1.2 1254 */ 1255 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1256 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1257 "CJK COMPATIBILITY IDEOGRAPHS", 1258 "CJKCOMPATIBILITYIDEOGRAPHS"); 1259 1260 /** 1261 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1262 * @since 1.2 1263 */ 1264 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1265 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1266 "ALPHABETIC PRESENTATION FORMS", 1267 "ALPHABETICPRESENTATIONFORMS"); 1268 1269 /** 1270 * Constant for the "Arabic Presentation Forms-A" Unicode character 1271 * block. 1272 * @since 1.2 1273 */ 1274 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1275 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1276 "ARABIC PRESENTATION FORMS-A", 1277 "ARABICPRESENTATIONFORMS-A"); 1278 1279 /** 1280 * Constant for the "Combining Half Marks" Unicode character block. 1281 * @since 1.2 1282 */ 1283 public static final UnicodeBlock COMBINING_HALF_MARKS = 1284 new UnicodeBlock("COMBINING_HALF_MARKS", 1285 "COMBINING HALF MARKS", 1286 "COMBININGHALFMARKS"); 1287 1288 /** 1289 * Constant for the "CJK Compatibility Forms" Unicode character block. 1290 * @since 1.2 1291 */ 1292 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1293 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1294 "CJK COMPATIBILITY FORMS", 1295 "CJKCOMPATIBILITYFORMS"); 1296 1297 /** 1298 * Constant for the "Small Form Variants" Unicode character block. 1299 * @since 1.2 1300 */ 1301 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1302 new UnicodeBlock("SMALL_FORM_VARIANTS", 1303 "SMALL FORM VARIANTS", 1304 "SMALLFORMVARIANTS"); 1305 1306 /** 1307 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1308 * @since 1.2 1309 */ 1310 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1311 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1312 "ARABIC PRESENTATION FORMS-B", 1313 "ARABICPRESENTATIONFORMS-B"); 1314 1315 /** 1316 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1317 * block. 1318 * @since 1.2 1319 */ 1320 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1321 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1322 "HALFWIDTH AND FULLWIDTH FORMS", 1323 "HALFWIDTHANDFULLWIDTHFORMS"); 1324 1325 /** 1326 * Constant for the "Specials" Unicode character block. 1327 * @since 1.2 1328 */ 1329 public static final UnicodeBlock SPECIALS = 1330 new UnicodeBlock("SPECIALS"); 1331 1332 /** 1333 * @deprecated 1334 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1335 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1336 * These constants match the block definitions of the Unicode Standard. 1337 * The {@link #of(char)} and {@link #of(int)} methods return the 1338 * standard constants. 1339 */ 1340 @Deprecated(since="1.5") 1341 public static final UnicodeBlock SURROGATES_AREA = 1342 new UnicodeBlock("SURROGATES_AREA"); 1343 1344 /** 1345 * Constant for the "Syriac" Unicode character block. 1346 * @since 1.4 1347 */ 1348 public static final UnicodeBlock SYRIAC = 1349 new UnicodeBlock("SYRIAC"); 1350 1351 /** 1352 * Constant for the "Thaana" Unicode character block. 1353 * @since 1.4 1354 */ 1355 public static final UnicodeBlock THAANA = 1356 new UnicodeBlock("THAANA"); 1357 1358 /** 1359 * Constant for the "Sinhala" Unicode character block. 1360 * @since 1.4 1361 */ 1362 public static final UnicodeBlock SINHALA = 1363 new UnicodeBlock("SINHALA"); 1364 1365 /** 1366 * Constant for the "Myanmar" Unicode character block. 1367 * @since 1.4 1368 */ 1369 public static final UnicodeBlock MYANMAR = 1370 new UnicodeBlock("MYANMAR"); 1371 1372 /** 1373 * Constant for the "Ethiopic" Unicode character block. 1374 * @since 1.4 1375 */ 1376 public static final UnicodeBlock ETHIOPIC = 1377 new UnicodeBlock("ETHIOPIC"); 1378 1379 /** 1380 * Constant for the "Cherokee" Unicode character block. 1381 * @since 1.4 1382 */ 1383 public static final UnicodeBlock CHEROKEE = 1384 new UnicodeBlock("CHEROKEE"); 1385 1386 /** 1387 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1388 * @since 1.4 1389 */ 1390 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1391 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1392 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1393 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1394 1395 /** 1396 * Constant for the "Ogham" Unicode character block. 1397 * @since 1.4 1398 */ 1399 public static final UnicodeBlock OGHAM = 1400 new UnicodeBlock("OGHAM"); 1401 1402 /** 1403 * Constant for the "Runic" Unicode character block. 1404 * @since 1.4 1405 */ 1406 public static final UnicodeBlock RUNIC = 1407 new UnicodeBlock("RUNIC"); 1408 1409 /** 1410 * Constant for the "Khmer" Unicode character block. 1411 * @since 1.4 1412 */ 1413 public static final UnicodeBlock KHMER = 1414 new UnicodeBlock("KHMER"); 1415 1416 /** 1417 * Constant for the "Mongolian" Unicode character block. 1418 * @since 1.4 1419 */ 1420 public static final UnicodeBlock MONGOLIAN = 1421 new UnicodeBlock("MONGOLIAN"); 1422 1423 /** 1424 * Constant for the "Braille Patterns" Unicode character block. 1425 * @since 1.4 1426 */ 1427 public static final UnicodeBlock BRAILLE_PATTERNS = 1428 new UnicodeBlock("BRAILLE_PATTERNS", 1429 "BRAILLE PATTERNS", 1430 "BRAILLEPATTERNS"); 1431 1432 /** 1433 * Constant for the "CJK Radicals Supplement" Unicode character block. 1434 * @since 1.4 1435 */ 1436 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1437 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1438 "CJK RADICALS SUPPLEMENT", 1439 "CJKRADICALSSUPPLEMENT"); 1440 1441 /** 1442 * Constant for the "Kangxi Radicals" Unicode character block. 1443 * @since 1.4 1444 */ 1445 public static final UnicodeBlock KANGXI_RADICALS = 1446 new UnicodeBlock("KANGXI_RADICALS", 1447 "KANGXI RADICALS", 1448 "KANGXIRADICALS"); 1449 1450 /** 1451 * Constant for the "Ideographic Description Characters" Unicode character block. 1452 * @since 1.4 1453 */ 1454 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1455 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1456 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1457 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1458 1459 /** 1460 * Constant for the "Bopomofo Extended" Unicode character block. 1461 * @since 1.4 1462 */ 1463 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1464 new UnicodeBlock("BOPOMOFO_EXTENDED", 1465 "BOPOMOFO EXTENDED", 1466 "BOPOMOFOEXTENDED"); 1467 1468 /** 1469 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1470 * @since 1.4 1471 */ 1472 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1473 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1474 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1475 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1476 1477 /** 1478 * Constant for the "Yi Syllables" Unicode character block. 1479 * @since 1.4 1480 */ 1481 public static final UnicodeBlock YI_SYLLABLES = 1482 new UnicodeBlock("YI_SYLLABLES", 1483 "YI SYLLABLES", 1484 "YISYLLABLES"); 1485 1486 /** 1487 * Constant for the "Yi Radicals" Unicode character block. 1488 * @since 1.4 1489 */ 1490 public static final UnicodeBlock YI_RADICALS = 1491 new UnicodeBlock("YI_RADICALS", 1492 "YI RADICALS", 1493 "YIRADICALS"); 1494 1495 /** 1496 * Constant for the "Cyrillic Supplement" Unicode character block. 1497 * This block was previously known as the "Cyrillic Supplementary" block. 1498 * @since 1.5 1499 */ 1500 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1501 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1502 "CYRILLIC SUPPLEMENTARY", 1503 "CYRILLICSUPPLEMENTARY", 1504 "CYRILLIC SUPPLEMENT", 1505 "CYRILLICSUPPLEMENT"); 1506 1507 /** 1508 * Constant for the "Tagalog" Unicode character block. 1509 * @since 1.5 1510 */ 1511 public static final UnicodeBlock TAGALOG = 1512 new UnicodeBlock("TAGALOG"); 1513 1514 /** 1515 * Constant for the "Hanunoo" Unicode character block. 1516 * @since 1.5 1517 */ 1518 public static final UnicodeBlock HANUNOO = 1519 new UnicodeBlock("HANUNOO"); 1520 1521 /** 1522 * Constant for the "Buhid" Unicode character block. 1523 * @since 1.5 1524 */ 1525 public static final UnicodeBlock BUHID = 1526 new UnicodeBlock("BUHID"); 1527 1528 /** 1529 * Constant for the "Tagbanwa" Unicode character block. 1530 * @since 1.5 1531 */ 1532 public static final UnicodeBlock TAGBANWA = 1533 new UnicodeBlock("TAGBANWA"); 1534 1535 /** 1536 * Constant for the "Limbu" Unicode character block. 1537 * @since 1.5 1538 */ 1539 public static final UnicodeBlock LIMBU = 1540 new UnicodeBlock("LIMBU"); 1541 1542 /** 1543 * Constant for the "Tai Le" Unicode character block. 1544 * @since 1.5 1545 */ 1546 public static final UnicodeBlock TAI_LE = 1547 new UnicodeBlock("TAI_LE", 1548 "TAI LE", 1549 "TAILE"); 1550 1551 /** 1552 * Constant for the "Khmer Symbols" Unicode character block. 1553 * @since 1.5 1554 */ 1555 public static final UnicodeBlock KHMER_SYMBOLS = 1556 new UnicodeBlock("KHMER_SYMBOLS", 1557 "KHMER SYMBOLS", 1558 "KHMERSYMBOLS"); 1559 1560 /** 1561 * Constant for the "Phonetic Extensions" Unicode character block. 1562 * @since 1.5 1563 */ 1564 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1565 new UnicodeBlock("PHONETIC_EXTENSIONS", 1566 "PHONETIC EXTENSIONS", 1567 "PHONETICEXTENSIONS"); 1568 1569 /** 1570 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1571 * @since 1.5 1572 */ 1573 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1574 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1575 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1576 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1577 1578 /** 1579 * Constant for the "Supplemental Arrows-A" Unicode character block. 1580 * @since 1.5 1581 */ 1582 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1583 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1584 "SUPPLEMENTAL ARROWS-A", 1585 "SUPPLEMENTALARROWS-A"); 1586 1587 /** 1588 * Constant for the "Supplemental Arrows-B" Unicode character block. 1589 * @since 1.5 1590 */ 1591 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1592 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1593 "SUPPLEMENTAL ARROWS-B", 1594 "SUPPLEMENTALARROWS-B"); 1595 1596 /** 1597 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1598 * character block. 1599 * @since 1.5 1600 */ 1601 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1602 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1603 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1604 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1605 1606 /** 1607 * Constant for the "Supplemental Mathematical Operators" Unicode 1608 * character block. 1609 * @since 1.5 1610 */ 1611 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1612 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1613 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1614 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1615 1616 /** 1617 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1618 * block. 1619 * @since 1.5 1620 */ 1621 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1622 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1623 "MISCELLANEOUS SYMBOLS AND ARROWS", 1624 "MISCELLANEOUSSYMBOLSANDARROWS"); 1625 1626 /** 1627 * Constant for the "Katakana Phonetic Extensions" Unicode character 1628 * block. 1629 * @since 1.5 1630 */ 1631 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1632 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1633 "KATAKANA PHONETIC EXTENSIONS", 1634 "KATAKANAPHONETICEXTENSIONS"); 1635 1636 /** 1637 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1638 * @since 1.5 1639 */ 1640 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1641 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1642 "YIJING HEXAGRAM SYMBOLS", 1643 "YIJINGHEXAGRAMSYMBOLS"); 1644 1645 /** 1646 * Constant for the "Variation Selectors" Unicode character block. 1647 * @since 1.5 1648 */ 1649 public static final UnicodeBlock VARIATION_SELECTORS = 1650 new UnicodeBlock("VARIATION_SELECTORS", 1651 "VARIATION SELECTORS", 1652 "VARIATIONSELECTORS"); 1653 1654 /** 1655 * Constant for the "Linear B Syllabary" Unicode character block. 1656 * @since 1.5 1657 */ 1658 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1659 new UnicodeBlock("LINEAR_B_SYLLABARY", 1660 "LINEAR B SYLLABARY", 1661 "LINEARBSYLLABARY"); 1662 1663 /** 1664 * Constant for the "Linear B Ideograms" Unicode character block. 1665 * @since 1.5 1666 */ 1667 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1668 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1669 "LINEAR B IDEOGRAMS", 1670 "LINEARBIDEOGRAMS"); 1671 1672 /** 1673 * Constant for the "Aegean Numbers" Unicode character block. 1674 * @since 1.5 1675 */ 1676 public static final UnicodeBlock AEGEAN_NUMBERS = 1677 new UnicodeBlock("AEGEAN_NUMBERS", 1678 "AEGEAN NUMBERS", 1679 "AEGEANNUMBERS"); 1680 1681 /** 1682 * Constant for the "Old Italic" Unicode character block. 1683 * @since 1.5 1684 */ 1685 public static final UnicodeBlock OLD_ITALIC = 1686 new UnicodeBlock("OLD_ITALIC", 1687 "OLD ITALIC", 1688 "OLDITALIC"); 1689 1690 /** 1691 * Constant for the "Gothic" Unicode character block. 1692 * @since 1.5 1693 */ 1694 public static final UnicodeBlock GOTHIC = 1695 new UnicodeBlock("GOTHIC"); 1696 1697 /** 1698 * Constant for the "Ugaritic" Unicode character block. 1699 * @since 1.5 1700 */ 1701 public static final UnicodeBlock UGARITIC = 1702 new UnicodeBlock("UGARITIC"); 1703 1704 /** 1705 * Constant for the "Deseret" Unicode character block. 1706 * @since 1.5 1707 */ 1708 public static final UnicodeBlock DESERET = 1709 new UnicodeBlock("DESERET"); 1710 1711 /** 1712 * Constant for the "Shavian" Unicode character block. 1713 * @since 1.5 1714 */ 1715 public static final UnicodeBlock SHAVIAN = 1716 new UnicodeBlock("SHAVIAN"); 1717 1718 /** 1719 * Constant for the "Osmanya" Unicode character block. 1720 * @since 1.5 1721 */ 1722 public static final UnicodeBlock OSMANYA = 1723 new UnicodeBlock("OSMANYA"); 1724 1725 /** 1726 * Constant for the "Cypriot Syllabary" Unicode character block. 1727 * @since 1.5 1728 */ 1729 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1730 new UnicodeBlock("CYPRIOT_SYLLABARY", 1731 "CYPRIOT SYLLABARY", 1732 "CYPRIOTSYLLABARY"); 1733 1734 /** 1735 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1736 * @since 1.5 1737 */ 1738 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1739 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1740 "BYZANTINE MUSICAL SYMBOLS", 1741 "BYZANTINEMUSICALSYMBOLS"); 1742 1743 /** 1744 * Constant for the "Musical Symbols" Unicode character block. 1745 * @since 1.5 1746 */ 1747 public static final UnicodeBlock MUSICAL_SYMBOLS = 1748 new UnicodeBlock("MUSICAL_SYMBOLS", 1749 "MUSICAL SYMBOLS", 1750 "MUSICALSYMBOLS"); 1751 1752 /** 1753 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1754 * @since 1.5 1755 */ 1756 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1757 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1758 "TAI XUAN JING SYMBOLS", 1759 "TAIXUANJINGSYMBOLS"); 1760 1761 /** 1762 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1763 * character block. 1764 * @since 1.5 1765 */ 1766 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1767 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1768 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1769 "MATHEMATICALALPHANUMERICSYMBOLS"); 1770 1771 /** 1772 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1773 * character block. 1774 * @since 1.5 1775 */ 1776 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1777 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1778 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1779 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1780 1781 /** 1782 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1783 * @since 1.5 1784 */ 1785 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1786 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1787 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1788 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1789 1790 /** 1791 * Constant for the "Tags" Unicode character block. 1792 * @since 1.5 1793 */ 1794 public static final UnicodeBlock TAGS = 1795 new UnicodeBlock("TAGS"); 1796 1797 /** 1798 * Constant for the "Variation Selectors Supplement" Unicode character 1799 * block. 1800 * @since 1.5 1801 */ 1802 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1803 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1804 "VARIATION SELECTORS SUPPLEMENT", 1805 "VARIATIONSELECTORSSUPPLEMENT"); 1806 1807 /** 1808 * Constant for the "Supplementary Private Use Area-A" Unicode character 1809 * block. 1810 * @since 1.5 1811 */ 1812 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1813 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1814 "SUPPLEMENTARY PRIVATE USE AREA-A", 1815 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1816 1817 /** 1818 * Constant for the "Supplementary Private Use Area-B" Unicode character 1819 * block. 1820 * @since 1.5 1821 */ 1822 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1823 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1824 "SUPPLEMENTARY PRIVATE USE AREA-B", 1825 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1826 1827 /** 1828 * Constant for the "High Surrogates" Unicode character block. 1829 * This block represents codepoint values in the high surrogate 1830 * range: U+D800 through U+DB7F 1831 * 1832 * @since 1.5 1833 */ 1834 public static final UnicodeBlock HIGH_SURROGATES = 1835 new UnicodeBlock("HIGH_SURROGATES", 1836 "HIGH SURROGATES", 1837 "HIGHSURROGATES"); 1838 1839 /** 1840 * Constant for the "High Private Use Surrogates" Unicode character 1841 * block. 1842 * This block represents codepoint values in the private use high 1843 * surrogate range: U+DB80 through U+DBFF 1844 * 1845 * @since 1.5 1846 */ 1847 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1848 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1849 "HIGH PRIVATE USE SURROGATES", 1850 "HIGHPRIVATEUSESURROGATES"); 1851 1852 /** 1853 * Constant for the "Low Surrogates" Unicode character block. 1854 * This block represents codepoint values in the low surrogate 1855 * range: U+DC00 through U+DFFF 1856 * 1857 * @since 1.5 1858 */ 1859 public static final UnicodeBlock LOW_SURROGATES = 1860 new UnicodeBlock("LOW_SURROGATES", 1861 "LOW SURROGATES", 1862 "LOWSURROGATES"); 1863 1864 /** 1865 * Constant for the "Arabic Supplement" Unicode character block. 1866 * @since 1.7 1867 */ 1868 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1869 new UnicodeBlock("ARABIC_SUPPLEMENT", 1870 "ARABIC SUPPLEMENT", 1871 "ARABICSUPPLEMENT"); 1872 1873 /** 1874 * Constant for the "NKo" Unicode character block. 1875 * @since 1.7 1876 */ 1877 public static final UnicodeBlock NKO = 1878 new UnicodeBlock("NKO"); 1879 1880 /** 1881 * Constant for the "Samaritan" Unicode character block. 1882 * @since 1.7 1883 */ 1884 public static final UnicodeBlock SAMARITAN = 1885 new UnicodeBlock("SAMARITAN"); 1886 1887 /** 1888 * Constant for the "Mandaic" Unicode character block. 1889 * @since 1.7 1890 */ 1891 public static final UnicodeBlock MANDAIC = 1892 new UnicodeBlock("MANDAIC"); 1893 1894 /** 1895 * Constant for the "Ethiopic Supplement" Unicode character block. 1896 * @since 1.7 1897 */ 1898 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1899 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1900 "ETHIOPIC SUPPLEMENT", 1901 "ETHIOPICSUPPLEMENT"); 1902 1903 /** 1904 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1905 * Unicode character block. 1906 * @since 1.7 1907 */ 1908 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1909 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1910 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1911 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1912 1913 /** 1914 * Constant for the "New Tai Lue" Unicode character block. 1915 * @since 1.7 1916 */ 1917 public static final UnicodeBlock NEW_TAI_LUE = 1918 new UnicodeBlock("NEW_TAI_LUE", 1919 "NEW TAI LUE", 1920 "NEWTAILUE"); 1921 1922 /** 1923 * Constant for the "Buginese" Unicode character block. 1924 * @since 1.7 1925 */ 1926 public static final UnicodeBlock BUGINESE = 1927 new UnicodeBlock("BUGINESE"); 1928 1929 /** 1930 * Constant for the "Tai Tham" Unicode character block. 1931 * @since 1.7 1932 */ 1933 public static final UnicodeBlock TAI_THAM = 1934 new UnicodeBlock("TAI_THAM", 1935 "TAI THAM", 1936 "TAITHAM"); 1937 1938 /** 1939 * Constant for the "Balinese" Unicode character block. 1940 * @since 1.7 1941 */ 1942 public static final UnicodeBlock BALINESE = 1943 new UnicodeBlock("BALINESE"); 1944 1945 /** 1946 * Constant for the "Sundanese" Unicode character block. 1947 * @since 1.7 1948 */ 1949 public static final UnicodeBlock SUNDANESE = 1950 new UnicodeBlock("SUNDANESE"); 1951 1952 /** 1953 * Constant for the "Batak" Unicode character block. 1954 * @since 1.7 1955 */ 1956 public static final UnicodeBlock BATAK = 1957 new UnicodeBlock("BATAK"); 1958 1959 /** 1960 * Constant for the "Lepcha" Unicode character block. 1961 * @since 1.7 1962 */ 1963 public static final UnicodeBlock LEPCHA = 1964 new UnicodeBlock("LEPCHA"); 1965 1966 /** 1967 * Constant for the "Ol Chiki" Unicode character block. 1968 * @since 1.7 1969 */ 1970 public static final UnicodeBlock OL_CHIKI = 1971 new UnicodeBlock("OL_CHIKI", 1972 "OL CHIKI", 1973 "OLCHIKI"); 1974 1975 /** 1976 * Constant for the "Vedic Extensions" Unicode character block. 1977 * @since 1.7 1978 */ 1979 public static final UnicodeBlock VEDIC_EXTENSIONS = 1980 new UnicodeBlock("VEDIC_EXTENSIONS", 1981 "VEDIC EXTENSIONS", 1982 "VEDICEXTENSIONS"); 1983 1984 /** 1985 * Constant for the "Phonetic Extensions Supplement" Unicode character 1986 * block. 1987 * @since 1.7 1988 */ 1989 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1990 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1991 "PHONETIC EXTENSIONS SUPPLEMENT", 1992 "PHONETICEXTENSIONSSUPPLEMENT"); 1993 1994 /** 1995 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1996 * character block. 1997 * @since 1.7 1998 */ 1999 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2000 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2001 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 2002 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2003 2004 /** 2005 * Constant for the "Glagolitic" Unicode character block. 2006 * @since 1.7 2007 */ 2008 public static final UnicodeBlock GLAGOLITIC = 2009 new UnicodeBlock("GLAGOLITIC"); 2010 2011 /** 2012 * Constant for the "Latin Extended-C" Unicode character block. 2013 * @since 1.7 2014 */ 2015 public static final UnicodeBlock LATIN_EXTENDED_C = 2016 new UnicodeBlock("LATIN_EXTENDED_C", 2017 "LATIN EXTENDED-C", 2018 "LATINEXTENDED-C"); 2019 2020 /** 2021 * Constant for the "Coptic" Unicode character block. 2022 * @since 1.7 2023 */ 2024 public static final UnicodeBlock COPTIC = 2025 new UnicodeBlock("COPTIC"); 2026 2027 /** 2028 * Constant for the "Georgian Supplement" Unicode character block. 2029 * @since 1.7 2030 */ 2031 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2032 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2033 "GEORGIAN SUPPLEMENT", 2034 "GEORGIANSUPPLEMENT"); 2035 2036 /** 2037 * Constant for the "Tifinagh" Unicode character block. 2038 * @since 1.7 2039 */ 2040 public static final UnicodeBlock TIFINAGH = 2041 new UnicodeBlock("TIFINAGH"); 2042 2043 /** 2044 * Constant for the "Ethiopic Extended" Unicode character block. 2045 * @since 1.7 2046 */ 2047 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2048 new UnicodeBlock("ETHIOPIC_EXTENDED", 2049 "ETHIOPIC EXTENDED", 2050 "ETHIOPICEXTENDED"); 2051 2052 /** 2053 * Constant for the "Cyrillic Extended-A" Unicode character block. 2054 * @since 1.7 2055 */ 2056 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2057 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2058 "CYRILLIC EXTENDED-A", 2059 "CYRILLICEXTENDED-A"); 2060 2061 /** 2062 * Constant for the "Supplemental Punctuation" Unicode character block. 2063 * @since 1.7 2064 */ 2065 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2066 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2067 "SUPPLEMENTAL PUNCTUATION", 2068 "SUPPLEMENTALPUNCTUATION"); 2069 2070 /** 2071 * Constant for the "CJK Strokes" Unicode character block. 2072 * @since 1.7 2073 */ 2074 public static final UnicodeBlock CJK_STROKES = 2075 new UnicodeBlock("CJK_STROKES", 2076 "CJK STROKES", 2077 "CJKSTROKES"); 2078 2079 /** 2080 * Constant for the "Lisu" Unicode character block. 2081 * @since 1.7 2082 */ 2083 public static final UnicodeBlock LISU = 2084 new UnicodeBlock("LISU"); 2085 2086 /** 2087 * Constant for the "Vai" Unicode character block. 2088 * @since 1.7 2089 */ 2090 public static final UnicodeBlock VAI = 2091 new UnicodeBlock("VAI"); 2092 2093 /** 2094 * Constant for the "Cyrillic Extended-B" Unicode character block. 2095 * @since 1.7 2096 */ 2097 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2098 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2099 "CYRILLIC EXTENDED-B", 2100 "CYRILLICEXTENDED-B"); 2101 2102 /** 2103 * Constant for the "Bamum" Unicode character block. 2104 * @since 1.7 2105 */ 2106 public static final UnicodeBlock BAMUM = 2107 new UnicodeBlock("BAMUM"); 2108 2109 /** 2110 * Constant for the "Modifier Tone Letters" Unicode character block. 2111 * @since 1.7 2112 */ 2113 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2114 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2115 "MODIFIER TONE LETTERS", 2116 "MODIFIERTONELETTERS"); 2117 2118 /** 2119 * Constant for the "Latin Extended-D" Unicode character block. 2120 * @since 1.7 2121 */ 2122 public static final UnicodeBlock LATIN_EXTENDED_D = 2123 new UnicodeBlock("LATIN_EXTENDED_D", 2124 "LATIN EXTENDED-D", 2125 "LATINEXTENDED-D"); 2126 2127 /** 2128 * Constant for the "Syloti Nagri" Unicode character block. 2129 * @since 1.7 2130 */ 2131 public static final UnicodeBlock SYLOTI_NAGRI = 2132 new UnicodeBlock("SYLOTI_NAGRI", 2133 "SYLOTI NAGRI", 2134 "SYLOTINAGRI"); 2135 2136 /** 2137 * Constant for the "Common Indic Number Forms" Unicode character block. 2138 * @since 1.7 2139 */ 2140 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2141 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2142 "COMMON INDIC NUMBER FORMS", 2143 "COMMONINDICNUMBERFORMS"); 2144 2145 /** 2146 * Constant for the "Phags-pa" Unicode character block. 2147 * @since 1.7 2148 */ 2149 public static final UnicodeBlock PHAGS_PA = 2150 new UnicodeBlock("PHAGS_PA", 2151 "PHAGS-PA"); 2152 2153 /** 2154 * Constant for the "Saurashtra" Unicode character block. 2155 * @since 1.7 2156 */ 2157 public static final UnicodeBlock SAURASHTRA = 2158 new UnicodeBlock("SAURASHTRA"); 2159 2160 /** 2161 * Constant for the "Devanagari Extended" Unicode character block. 2162 * @since 1.7 2163 */ 2164 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2165 new UnicodeBlock("DEVANAGARI_EXTENDED", 2166 "DEVANAGARI EXTENDED", 2167 "DEVANAGARIEXTENDED"); 2168 2169 /** 2170 * Constant for the "Kayah Li" Unicode character block. 2171 * @since 1.7 2172 */ 2173 public static final UnicodeBlock KAYAH_LI = 2174 new UnicodeBlock("KAYAH_LI", 2175 "KAYAH LI", 2176 "KAYAHLI"); 2177 2178 /** 2179 * Constant for the "Rejang" Unicode character block. 2180 * @since 1.7 2181 */ 2182 public static final UnicodeBlock REJANG = 2183 new UnicodeBlock("REJANG"); 2184 2185 /** 2186 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2187 * @since 1.7 2188 */ 2189 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2190 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2191 "HANGUL JAMO EXTENDED-A", 2192 "HANGULJAMOEXTENDED-A"); 2193 2194 /** 2195 * Constant for the "Javanese" Unicode character block. 2196 * @since 1.7 2197 */ 2198 public static final UnicodeBlock JAVANESE = 2199 new UnicodeBlock("JAVANESE"); 2200 2201 /** 2202 * Constant for the "Cham" Unicode character block. 2203 * @since 1.7 2204 */ 2205 public static final UnicodeBlock CHAM = 2206 new UnicodeBlock("CHAM"); 2207 2208 /** 2209 * Constant for the "Myanmar Extended-A" Unicode character block. 2210 * @since 1.7 2211 */ 2212 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2213 new UnicodeBlock("MYANMAR_EXTENDED_A", 2214 "MYANMAR EXTENDED-A", 2215 "MYANMAREXTENDED-A"); 2216 2217 /** 2218 * Constant for the "Tai Viet" Unicode character block. 2219 * @since 1.7 2220 */ 2221 public static final UnicodeBlock TAI_VIET = 2222 new UnicodeBlock("TAI_VIET", 2223 "TAI VIET", 2224 "TAIVIET"); 2225 2226 /** 2227 * Constant for the "Ethiopic Extended-A" Unicode character block. 2228 * @since 1.7 2229 */ 2230 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2231 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2232 "ETHIOPIC EXTENDED-A", 2233 "ETHIOPICEXTENDED-A"); 2234 2235 /** 2236 * Constant for the "Meetei Mayek" Unicode character block. 2237 * @since 1.7 2238 */ 2239 public static final UnicodeBlock MEETEI_MAYEK = 2240 new UnicodeBlock("MEETEI_MAYEK", 2241 "MEETEI MAYEK", 2242 "MEETEIMAYEK"); 2243 2244 /** 2245 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2246 * @since 1.7 2247 */ 2248 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2249 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2250 "HANGUL JAMO EXTENDED-B", 2251 "HANGULJAMOEXTENDED-B"); 2252 2253 /** 2254 * Constant for the "Vertical Forms" Unicode character block. 2255 * @since 1.7 2256 */ 2257 public static final UnicodeBlock VERTICAL_FORMS = 2258 new UnicodeBlock("VERTICAL_FORMS", 2259 "VERTICAL FORMS", 2260 "VERTICALFORMS"); 2261 2262 /** 2263 * Constant for the "Ancient Greek Numbers" Unicode character block. 2264 * @since 1.7 2265 */ 2266 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2267 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2268 "ANCIENT GREEK NUMBERS", 2269 "ANCIENTGREEKNUMBERS"); 2270 2271 /** 2272 * Constant for the "Ancient Symbols" Unicode character block. 2273 * @since 1.7 2274 */ 2275 public static final UnicodeBlock ANCIENT_SYMBOLS = 2276 new UnicodeBlock("ANCIENT_SYMBOLS", 2277 "ANCIENT SYMBOLS", 2278 "ANCIENTSYMBOLS"); 2279 2280 /** 2281 * Constant for the "Phaistos Disc" Unicode character block. 2282 * @since 1.7 2283 */ 2284 public static final UnicodeBlock PHAISTOS_DISC = 2285 new UnicodeBlock("PHAISTOS_DISC", 2286 "PHAISTOS DISC", 2287 "PHAISTOSDISC"); 2288 2289 /** 2290 * Constant for the "Lycian" Unicode character block. 2291 * @since 1.7 2292 */ 2293 public static final UnicodeBlock LYCIAN = 2294 new UnicodeBlock("LYCIAN"); 2295 2296 /** 2297 * Constant for the "Carian" Unicode character block. 2298 * @since 1.7 2299 */ 2300 public static final UnicodeBlock CARIAN = 2301 new UnicodeBlock("CARIAN"); 2302 2303 /** 2304 * Constant for the "Old Persian" Unicode character block. 2305 * @since 1.7 2306 */ 2307 public static final UnicodeBlock OLD_PERSIAN = 2308 new UnicodeBlock("OLD_PERSIAN", 2309 "OLD PERSIAN", 2310 "OLDPERSIAN"); 2311 2312 /** 2313 * Constant for the "Imperial Aramaic" Unicode character block. 2314 * @since 1.7 2315 */ 2316 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2317 new UnicodeBlock("IMPERIAL_ARAMAIC", 2318 "IMPERIAL ARAMAIC", 2319 "IMPERIALARAMAIC"); 2320 2321 /** 2322 * Constant for the "Phoenician" Unicode character block. 2323 * @since 1.7 2324 */ 2325 public static final UnicodeBlock PHOENICIAN = 2326 new UnicodeBlock("PHOENICIAN"); 2327 2328 /** 2329 * Constant for the "Lydian" Unicode character block. 2330 * @since 1.7 2331 */ 2332 public static final UnicodeBlock LYDIAN = 2333 new UnicodeBlock("LYDIAN"); 2334 2335 /** 2336 * Constant for the "Kharoshthi" Unicode character block. 2337 * @since 1.7 2338 */ 2339 public static final UnicodeBlock KHAROSHTHI = 2340 new UnicodeBlock("KHAROSHTHI"); 2341 2342 /** 2343 * Constant for the "Old South Arabian" Unicode character block. 2344 * @since 1.7 2345 */ 2346 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2347 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2348 "OLD SOUTH ARABIAN", 2349 "OLDSOUTHARABIAN"); 2350 2351 /** 2352 * Constant for the "Avestan" Unicode character block. 2353 * @since 1.7 2354 */ 2355 public static final UnicodeBlock AVESTAN = 2356 new UnicodeBlock("AVESTAN"); 2357 2358 /** 2359 * Constant for the "Inscriptional Parthian" Unicode character block. 2360 * @since 1.7 2361 */ 2362 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2363 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2364 "INSCRIPTIONAL PARTHIAN", 2365 "INSCRIPTIONALPARTHIAN"); 2366 2367 /** 2368 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2369 * @since 1.7 2370 */ 2371 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2372 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2373 "INSCRIPTIONAL PAHLAVI", 2374 "INSCRIPTIONALPAHLAVI"); 2375 2376 /** 2377 * Constant for the "Old Turkic" Unicode character block. 2378 * @since 1.7 2379 */ 2380 public static final UnicodeBlock OLD_TURKIC = 2381 new UnicodeBlock("OLD_TURKIC", 2382 "OLD TURKIC", 2383 "OLDTURKIC"); 2384 2385 /** 2386 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2387 * @since 1.7 2388 */ 2389 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2390 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2391 "RUMI NUMERAL SYMBOLS", 2392 "RUMINUMERALSYMBOLS"); 2393 2394 /** 2395 * Constant for the "Brahmi" Unicode character block. 2396 * @since 1.7 2397 */ 2398 public static final UnicodeBlock BRAHMI = 2399 new UnicodeBlock("BRAHMI"); 2400 2401 /** 2402 * Constant for the "Kaithi" Unicode character block. 2403 * @since 1.7 2404 */ 2405 public static final UnicodeBlock KAITHI = 2406 new UnicodeBlock("KAITHI"); 2407 2408 /** 2409 * Constant for the "Cuneiform" Unicode character block. 2410 * @since 1.7 2411 */ 2412 public static final UnicodeBlock CUNEIFORM = 2413 new UnicodeBlock("CUNEIFORM"); 2414 2415 /** 2416 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2417 * character block. 2418 * @since 1.7 2419 */ 2420 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2421 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2422 "CUNEIFORM NUMBERS AND PUNCTUATION", 2423 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2424 2425 /** 2426 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2427 * @since 1.7 2428 */ 2429 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2430 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2431 "EGYPTIAN HIEROGLYPHS", 2432 "EGYPTIANHIEROGLYPHS"); 2433 2434 /** 2435 * Constant for the "Bamum Supplement" Unicode character block. 2436 * @since 1.7 2437 */ 2438 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2439 new UnicodeBlock("BAMUM_SUPPLEMENT", 2440 "BAMUM SUPPLEMENT", 2441 "BAMUMSUPPLEMENT"); 2442 2443 /** 2444 * Constant for the "Kana Supplement" Unicode character block. 2445 * @since 1.7 2446 */ 2447 public static final UnicodeBlock KANA_SUPPLEMENT = 2448 new UnicodeBlock("KANA_SUPPLEMENT", 2449 "KANA SUPPLEMENT", 2450 "KANASUPPLEMENT"); 2451 2452 /** 2453 * Constant for the "Ancient Greek Musical Notation" Unicode character 2454 * block. 2455 * @since 1.7 2456 */ 2457 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2458 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2459 "ANCIENT GREEK MUSICAL NOTATION", 2460 "ANCIENTGREEKMUSICALNOTATION"); 2461 2462 /** 2463 * Constant for the "Counting Rod Numerals" Unicode character block. 2464 * @since 1.7 2465 */ 2466 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2467 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2468 "COUNTING ROD NUMERALS", 2469 "COUNTINGRODNUMERALS"); 2470 2471 /** 2472 * Constant for the "Mahjong Tiles" Unicode character block. 2473 * @since 1.7 2474 */ 2475 public static final UnicodeBlock MAHJONG_TILES = 2476 new UnicodeBlock("MAHJONG_TILES", 2477 "MAHJONG TILES", 2478 "MAHJONGTILES"); 2479 2480 /** 2481 * Constant for the "Domino Tiles" Unicode character block. 2482 * @since 1.7 2483 */ 2484 public static final UnicodeBlock DOMINO_TILES = 2485 new UnicodeBlock("DOMINO_TILES", 2486 "DOMINO TILES", 2487 "DOMINOTILES"); 2488 2489 /** 2490 * Constant for the "Playing Cards" Unicode character block. 2491 * @since 1.7 2492 */ 2493 public static final UnicodeBlock PLAYING_CARDS = 2494 new UnicodeBlock("PLAYING_CARDS", 2495 "PLAYING CARDS", 2496 "PLAYINGCARDS"); 2497 2498 /** 2499 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2500 * block. 2501 * @since 1.7 2502 */ 2503 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2504 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2505 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2506 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2507 2508 /** 2509 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2510 * block. 2511 * @since 1.7 2512 */ 2513 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2514 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2515 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2516 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2517 2518 /** 2519 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2520 * character block. 2521 * @since 1.7 2522 */ 2523 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2524 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2525 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2526 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2527 2528 /** 2529 * Constant for the "Emoticons" Unicode character block. 2530 * @since 1.7 2531 */ 2532 public static final UnicodeBlock EMOTICONS = 2533 new UnicodeBlock("EMOTICONS"); 2534 2535 /** 2536 * Constant for the "Transport And Map Symbols" Unicode character block. 2537 * @since 1.7 2538 */ 2539 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2540 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2541 "TRANSPORT AND MAP SYMBOLS", 2542 "TRANSPORTANDMAPSYMBOLS"); 2543 2544 /** 2545 * Constant for the "Alchemical Symbols" Unicode character block. 2546 * @since 1.7 2547 */ 2548 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2549 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2550 "ALCHEMICAL SYMBOLS", 2551 "ALCHEMICALSYMBOLS"); 2552 2553 /** 2554 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2555 * character block. 2556 * @since 1.7 2557 */ 2558 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2559 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2560 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2561 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2562 2563 /** 2564 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2565 * character block. 2566 * @since 1.7 2567 */ 2568 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2569 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2570 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2571 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2572 2573 /** 2574 * Constant for the "Arabic Extended-A" Unicode character block. 2575 * @since 1.8 2576 */ 2577 public static final UnicodeBlock ARABIC_EXTENDED_A = 2578 new UnicodeBlock("ARABIC_EXTENDED_A", 2579 "ARABIC EXTENDED-A", 2580 "ARABICEXTENDED-A"); 2581 2582 /** 2583 * Constant for the "Sundanese Supplement" Unicode character block. 2584 * @since 1.8 2585 */ 2586 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2587 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2588 "SUNDANESE SUPPLEMENT", 2589 "SUNDANESESUPPLEMENT"); 2590 2591 /** 2592 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2593 * @since 1.8 2594 */ 2595 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2596 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2597 "MEETEI MAYEK EXTENSIONS", 2598 "MEETEIMAYEKEXTENSIONS"); 2599 2600 /** 2601 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2602 * @since 1.8 2603 */ 2604 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2605 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2606 "MEROITIC HIEROGLYPHS", 2607 "MEROITICHIEROGLYPHS"); 2608 2609 /** 2610 * Constant for the "Meroitic Cursive" Unicode character block. 2611 * @since 1.8 2612 */ 2613 public static final UnicodeBlock MEROITIC_CURSIVE = 2614 new UnicodeBlock("MEROITIC_CURSIVE", 2615 "MEROITIC CURSIVE", 2616 "MEROITICCURSIVE"); 2617 2618 /** 2619 * Constant for the "Sora Sompeng" Unicode character block. 2620 * @since 1.8 2621 */ 2622 public static final UnicodeBlock SORA_SOMPENG = 2623 new UnicodeBlock("SORA_SOMPENG", 2624 "SORA SOMPENG", 2625 "SORASOMPENG"); 2626 2627 /** 2628 * Constant for the "Chakma" Unicode character block. 2629 * @since 1.8 2630 */ 2631 public static final UnicodeBlock CHAKMA = 2632 new UnicodeBlock("CHAKMA"); 2633 2634 /** 2635 * Constant for the "Sharada" Unicode character block. 2636 * @since 1.8 2637 */ 2638 public static final UnicodeBlock SHARADA = 2639 new UnicodeBlock("SHARADA"); 2640 2641 /** 2642 * Constant for the "Takri" Unicode character block. 2643 * @since 1.8 2644 */ 2645 public static final UnicodeBlock TAKRI = 2646 new UnicodeBlock("TAKRI"); 2647 2648 /** 2649 * Constant for the "Miao" Unicode character block. 2650 * @since 1.8 2651 */ 2652 public static final UnicodeBlock MIAO = 2653 new UnicodeBlock("MIAO"); 2654 2655 /** 2656 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2657 * character block. 2658 * @since 1.8 2659 */ 2660 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2661 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2662 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2663 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2664 2665 /** 2666 * Constant for the "Combining Diacritical Marks Extended" Unicode 2667 * character block. 2668 * @since 9 2669 */ 2670 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2671 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2672 "COMBINING DIACRITICAL MARKS EXTENDED", 2673 "COMBININGDIACRITICALMARKSEXTENDED"); 2674 2675 /** 2676 * Constant for the "Myanmar Extended-B" Unicode character block. 2677 * @since 9 2678 */ 2679 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2680 new UnicodeBlock("MYANMAR_EXTENDED_B", 2681 "MYANMAR EXTENDED-B", 2682 "MYANMAREXTENDED-B"); 2683 2684 /** 2685 * Constant for the "Latin Extended-E" Unicode character block. 2686 * @since 9 2687 */ 2688 public static final UnicodeBlock LATIN_EXTENDED_E = 2689 new UnicodeBlock("LATIN_EXTENDED_E", 2690 "LATIN EXTENDED-E", 2691 "LATINEXTENDED-E"); 2692 2693 /** 2694 * Constant for the "Coptic Epact Numbers" Unicode character block. 2695 * @since 9 2696 */ 2697 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2698 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2699 "COPTIC EPACT NUMBERS", 2700 "COPTICEPACTNUMBERS"); 2701 2702 /** 2703 * Constant for the "Old Permic" Unicode character block. 2704 * @since 9 2705 */ 2706 public static final UnicodeBlock OLD_PERMIC = 2707 new UnicodeBlock("OLD_PERMIC", 2708 "OLD PERMIC", 2709 "OLDPERMIC"); 2710 2711 /** 2712 * Constant for the "Elbasan" Unicode character block. 2713 * @since 9 2714 */ 2715 public static final UnicodeBlock ELBASAN = 2716 new UnicodeBlock("ELBASAN"); 2717 2718 /** 2719 * Constant for the "Caucasian Albanian" Unicode character block. 2720 * @since 9 2721 */ 2722 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2723 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2724 "CAUCASIAN ALBANIAN", 2725 "CAUCASIANALBANIAN"); 2726 2727 /** 2728 * Constant for the "Linear A" Unicode character block. 2729 * @since 9 2730 */ 2731 public static final UnicodeBlock LINEAR_A = 2732 new UnicodeBlock("LINEAR_A", 2733 "LINEAR A", 2734 "LINEARA"); 2735 2736 /** 2737 * Constant for the "Palmyrene" Unicode character block. 2738 * @since 9 2739 */ 2740 public static final UnicodeBlock PALMYRENE = 2741 new UnicodeBlock("PALMYRENE"); 2742 2743 /** 2744 * Constant for the "Nabataean" Unicode character block. 2745 * @since 9 2746 */ 2747 public static final UnicodeBlock NABATAEAN = 2748 new UnicodeBlock("NABATAEAN"); 2749 2750 /** 2751 * Constant for the "Old North Arabian" Unicode character block. 2752 * @since 9 2753 */ 2754 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2755 new UnicodeBlock("OLD_NORTH_ARABIAN", 2756 "OLD NORTH ARABIAN", 2757 "OLDNORTHARABIAN"); 2758 2759 /** 2760 * Constant for the "Manichaean" Unicode character block. 2761 * @since 9 2762 */ 2763 public static final UnicodeBlock MANICHAEAN = 2764 new UnicodeBlock("MANICHAEAN"); 2765 2766 /** 2767 * Constant for the "Psalter Pahlavi" Unicode character block. 2768 * @since 9 2769 */ 2770 public static final UnicodeBlock PSALTER_PAHLAVI = 2771 new UnicodeBlock("PSALTER_PAHLAVI", 2772 "PSALTER PAHLAVI", 2773 "PSALTERPAHLAVI"); 2774 2775 /** 2776 * Constant for the "Mahajani" Unicode character block. 2777 * @since 9 2778 */ 2779 public static final UnicodeBlock MAHAJANI = 2780 new UnicodeBlock("MAHAJANI"); 2781 2782 /** 2783 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2784 * @since 9 2785 */ 2786 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2787 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2788 "SINHALA ARCHAIC NUMBERS", 2789 "SINHALAARCHAICNUMBERS"); 2790 2791 /** 2792 * Constant for the "Khojki" Unicode character block. 2793 * @since 9 2794 */ 2795 public static final UnicodeBlock KHOJKI = 2796 new UnicodeBlock("KHOJKI"); 2797 2798 /** 2799 * Constant for the "Khudawadi" Unicode character block. 2800 * @since 9 2801 */ 2802 public static final UnicodeBlock KHUDAWADI = 2803 new UnicodeBlock("KHUDAWADI"); 2804 2805 /** 2806 * Constant for the "Grantha" Unicode character block. 2807 * @since 9 2808 */ 2809 public static final UnicodeBlock GRANTHA = 2810 new UnicodeBlock("GRANTHA"); 2811 2812 /** 2813 * Constant for the "Tirhuta" Unicode character block. 2814 * @since 9 2815 */ 2816 public static final UnicodeBlock TIRHUTA = 2817 new UnicodeBlock("TIRHUTA"); 2818 2819 /** 2820 * Constant for the "Siddham" Unicode character block. 2821 * @since 9 2822 */ 2823 public static final UnicodeBlock SIDDHAM = 2824 new UnicodeBlock("SIDDHAM"); 2825 2826 /** 2827 * Constant for the "Modi" Unicode character block. 2828 * @since 9 2829 */ 2830 public static final UnicodeBlock MODI = 2831 new UnicodeBlock("MODI"); 2832 2833 /** 2834 * Constant for the "Warang Citi" Unicode character block. 2835 * @since 9 2836 */ 2837 public static final UnicodeBlock WARANG_CITI = 2838 new UnicodeBlock("WARANG_CITI", 2839 "WARANG CITI", 2840 "WARANGCITI"); 2841 2842 /** 2843 * Constant for the "Pau Cin Hau" Unicode character block. 2844 * @since 9 2845 */ 2846 public static final UnicodeBlock PAU_CIN_HAU = 2847 new UnicodeBlock("PAU_CIN_HAU", 2848 "PAU CIN HAU", 2849 "PAUCINHAU"); 2850 2851 /** 2852 * Constant for the "Mro" Unicode character block. 2853 * @since 9 2854 */ 2855 public static final UnicodeBlock MRO = 2856 new UnicodeBlock("MRO"); 2857 2858 /** 2859 * Constant for the "Bassa Vah" Unicode character block. 2860 * @since 9 2861 */ 2862 public static final UnicodeBlock BASSA_VAH = 2863 new UnicodeBlock("BASSA_VAH", 2864 "BASSA VAH", 2865 "BASSAVAH"); 2866 2867 /** 2868 * Constant for the "Pahawh Hmong" Unicode character block. 2869 * @since 9 2870 */ 2871 public static final UnicodeBlock PAHAWH_HMONG = 2872 new UnicodeBlock("PAHAWH_HMONG", 2873 "PAHAWH HMONG", 2874 "PAHAWHHMONG"); 2875 2876 /** 2877 * Constant for the "Duployan" Unicode character block. 2878 * @since 9 2879 */ 2880 public static final UnicodeBlock DUPLOYAN = 2881 new UnicodeBlock("DUPLOYAN"); 2882 2883 /** 2884 * Constant for the "Shorthand Format Controls" Unicode character block. 2885 * @since 9 2886 */ 2887 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2888 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2889 "SHORTHAND FORMAT CONTROLS", 2890 "SHORTHANDFORMATCONTROLS"); 2891 2892 /** 2893 * Constant for the "Mende Kikakui" Unicode character block. 2894 * @since 9 2895 */ 2896 public static final UnicodeBlock MENDE_KIKAKUI = 2897 new UnicodeBlock("MENDE_KIKAKUI", 2898 "MENDE KIKAKUI", 2899 "MENDEKIKAKUI"); 2900 2901 /** 2902 * Constant for the "Ornamental Dingbats" Unicode character block. 2903 * @since 9 2904 */ 2905 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2906 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2907 "ORNAMENTAL DINGBATS", 2908 "ORNAMENTALDINGBATS"); 2909 2910 /** 2911 * Constant for the "Geometric Shapes Extended" Unicode character block. 2912 * @since 9 2913 */ 2914 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2915 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2916 "GEOMETRIC SHAPES EXTENDED", 2917 "GEOMETRICSHAPESEXTENDED"); 2918 2919 /** 2920 * Constant for the "Supplemental Arrows-C" Unicode character block. 2921 * @since 9 2922 */ 2923 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2924 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2925 "SUPPLEMENTAL ARROWS-C", 2926 "SUPPLEMENTALARROWS-C"); 2927 2928 /** 2929 * Constant for the "Cherokee Supplement" Unicode character block. 2930 * @since 9 2931 */ 2932 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2933 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2934 "CHEROKEE SUPPLEMENT", 2935 "CHEROKEESUPPLEMENT"); 2936 2937 /** 2938 * Constant for the "Hatran" Unicode character block. 2939 * @since 9 2940 */ 2941 public static final UnicodeBlock HATRAN = 2942 new UnicodeBlock("HATRAN"); 2943 2944 /** 2945 * Constant for the "Old Hungarian" Unicode character block. 2946 * @since 9 2947 */ 2948 public static final UnicodeBlock OLD_HUNGARIAN = 2949 new UnicodeBlock("OLD_HUNGARIAN", 2950 "OLD HUNGARIAN", 2951 "OLDHUNGARIAN"); 2952 2953 /** 2954 * Constant for the "Multani" Unicode character block. 2955 * @since 9 2956 */ 2957 public static final UnicodeBlock MULTANI = 2958 new UnicodeBlock("MULTANI"); 2959 2960 /** 2961 * Constant for the "Ahom" Unicode character block. 2962 * @since 9 2963 */ 2964 public static final UnicodeBlock AHOM = 2965 new UnicodeBlock("AHOM"); 2966 2967 /** 2968 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2969 * @since 9 2970 */ 2971 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2972 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2973 "EARLY DYNASTIC CUNEIFORM", 2974 "EARLYDYNASTICCUNEIFORM"); 2975 2976 /** 2977 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2978 * @since 9 2979 */ 2980 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2981 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2982 "ANATOLIAN HIEROGLYPHS", 2983 "ANATOLIANHIEROGLYPHS"); 2984 2985 /** 2986 * Constant for the "Sutton SignWriting" Unicode character block. 2987 * @since 9 2988 */ 2989 public static final UnicodeBlock SUTTON_SIGNWRITING = 2990 new UnicodeBlock("SUTTON_SIGNWRITING", 2991 "SUTTON SIGNWRITING", 2992 "SUTTONSIGNWRITING"); 2993 2994 /** 2995 * Constant for the "Supplemental Symbols and Pictographs" Unicode 2996 * character block. 2997 * @since 9 2998 */ 2999 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 3000 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 3001 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 3002 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3003 3004 /** 3005 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3006 * character block. 3007 * @since 9 3008 */ 3009 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3010 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3011 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3012 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3013 3014 /** 3015 * Constant for the "Syriac Supplement" Unicode 3016 * character block. 3017 * @since 11 3018 */ 3019 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3020 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3021 "SYRIAC SUPPLEMENT", 3022 "SYRIACSUPPLEMENT"); 3023 3024 /** 3025 * Constant for the "Cyrillic Extended-C" Unicode 3026 * character block. 3027 * @since 11 3028 */ 3029 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3030 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3031 "CYRILLIC EXTENDED-C", 3032 "CYRILLICEXTENDED-C"); 3033 3034 /** 3035 * Constant for the "Osage" Unicode 3036 * character block. 3037 * @since 11 3038 */ 3039 public static final UnicodeBlock OSAGE = 3040 new UnicodeBlock("OSAGE"); 3041 3042 /** 3043 * Constant for the "Newa" Unicode 3044 * character block. 3045 * @since 11 3046 */ 3047 public static final UnicodeBlock NEWA = 3048 new UnicodeBlock("NEWA"); 3049 3050 /** 3051 * Constant for the "Mongolian Supplement" Unicode 3052 * character block. 3053 * @since 11 3054 */ 3055 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3056 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3057 "MONGOLIAN SUPPLEMENT", 3058 "MONGOLIANSUPPLEMENT"); 3059 3060 /** 3061 * Constant for the "Marchen" Unicode 3062 * character block. 3063 * @since 11 3064 */ 3065 public static final UnicodeBlock MARCHEN = 3066 new UnicodeBlock("MARCHEN"); 3067 3068 /** 3069 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3070 * character block. 3071 * @since 11 3072 */ 3073 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3074 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3075 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3076 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3077 3078 /** 3079 * Constant for the "Tangut" Unicode 3080 * character block. 3081 * @since 11 3082 */ 3083 public static final UnicodeBlock TANGUT = 3084 new UnicodeBlock("TANGUT"); 3085 3086 /** 3087 * Constant for the "Tangut Components" Unicode 3088 * character block. 3089 * @since 11 3090 */ 3091 public static final UnicodeBlock TANGUT_COMPONENTS = 3092 new UnicodeBlock("TANGUT_COMPONENTS", 3093 "TANGUT COMPONENTS", 3094 "TANGUTCOMPONENTS"); 3095 3096 /** 3097 * Constant for the "Kana Extended-A" Unicode 3098 * character block. 3099 * @since 11 3100 */ 3101 public static final UnicodeBlock KANA_EXTENDED_A = 3102 new UnicodeBlock("KANA_EXTENDED_A", 3103 "KANA EXTENDED-A", 3104 "KANAEXTENDED-A"); 3105 /** 3106 * Constant for the "Glagolitic Supplement" Unicode 3107 * character block. 3108 * @since 11 3109 */ 3110 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3111 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3112 "GLAGOLITIC SUPPLEMENT", 3113 "GLAGOLITICSUPPLEMENT"); 3114 /** 3115 * Constant for the "Adlam" Unicode 3116 * character block. 3117 * @since 11 3118 */ 3119 public static final UnicodeBlock ADLAM = 3120 new UnicodeBlock("ADLAM"); 3121 3122 /** 3123 * Constant for the "Masaram Gondi" Unicode 3124 * character block. 3125 * @since 11 3126 */ 3127 public static final UnicodeBlock MASARAM_GONDI = 3128 new UnicodeBlock("MASARAM_GONDI", 3129 "MASARAM GONDI", 3130 "MASARAMGONDI"); 3131 3132 /** 3133 * Constant for the "Zanabazar Square" Unicode 3134 * character block. 3135 * @since 11 3136 */ 3137 public static final UnicodeBlock ZANABAZAR_SQUARE = 3138 new UnicodeBlock("ZANABAZAR_SQUARE", 3139 "ZANABAZAR SQUARE", 3140 "ZANABAZARSQUARE"); 3141 3142 /** 3143 * Constant for the "Nushu" Unicode 3144 * character block. 3145 * @since 11 3146 */ 3147 public static final UnicodeBlock NUSHU = 3148 new UnicodeBlock("NUSHU"); 3149 3150 /** 3151 * Constant for the "Soyombo" Unicode 3152 * character block. 3153 * @since 11 3154 */ 3155 public static final UnicodeBlock SOYOMBO = 3156 new UnicodeBlock("SOYOMBO"); 3157 3158 /** 3159 * Constant for the "Bhaiksuki" Unicode 3160 * character block. 3161 * @since 11 3162 */ 3163 public static final UnicodeBlock BHAIKSUKI = 3164 new UnicodeBlock("BHAIKSUKI"); 3165 3166 /** 3167 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3168 * character block. 3169 * @since 11 3170 */ 3171 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3172 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3173 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3174 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3175 /** 3176 * Constant for the "Georgian Extended" Unicode 3177 * character block. 3178 * @since 12 3179 */ 3180 public static final UnicodeBlock GEORGIAN_EXTENDED = 3181 new UnicodeBlock("GEORGIAN_EXTENDED", 3182 "GEORGIAN EXTENDED", 3183 "GEORGIANEXTENDED"); 3184 3185 /** 3186 * Constant for the "Hanifi Rohingya" Unicode 3187 * character block. 3188 * @since 12 3189 */ 3190 public static final UnicodeBlock HANIFI_ROHINGYA = 3191 new UnicodeBlock("HANIFI_ROHINGYA", 3192 "HANIFI ROHINGYA", 3193 "HANIFIROHINGYA"); 3194 3195 /** 3196 * Constant for the "Old Sogdian" Unicode 3197 * character block. 3198 * @since 12 3199 */ 3200 public static final UnicodeBlock OLD_SOGDIAN = 3201 new UnicodeBlock("OLD_SOGDIAN", 3202 "OLD SOGDIAN", 3203 "OLDSOGDIAN"); 3204 3205 /** 3206 * Constant for the "Sogdian" Unicode 3207 * character block. 3208 * @since 12 3209 */ 3210 public static final UnicodeBlock SOGDIAN = 3211 new UnicodeBlock("SOGDIAN"); 3212 3213 /** 3214 * Constant for the "Dogra" Unicode 3215 * character block. 3216 * @since 12 3217 */ 3218 public static final UnicodeBlock DOGRA = 3219 new UnicodeBlock("DOGRA"); 3220 3221 /** 3222 * Constant for the "Gunjala Gondi" Unicode 3223 * character block. 3224 * @since 12 3225 */ 3226 public static final UnicodeBlock GUNJALA_GONDI = 3227 new UnicodeBlock("GUNJALA_GONDI", 3228 "GUNJALA GONDI", 3229 "GUNJALAGONDI"); 3230 3231 /** 3232 * Constant for the "Makasar" Unicode 3233 * character block. 3234 * @since 12 3235 */ 3236 public static final UnicodeBlock MAKASAR = 3237 new UnicodeBlock("MAKASAR"); 3238 3239 /** 3240 * Constant for the "Medefaidrin" Unicode 3241 * character block. 3242 * @since 12 3243 */ 3244 public static final UnicodeBlock MEDEFAIDRIN = 3245 new UnicodeBlock("MEDEFAIDRIN"); 3246 3247 /** 3248 * Constant for the "Mayan Numerals" Unicode 3249 * character block. 3250 * @since 12 3251 */ 3252 public static final UnicodeBlock MAYAN_NUMERALS = 3253 new UnicodeBlock("MAYAN_NUMERALS", 3254 "MAYAN NUMERALS", 3255 "MAYANNUMERALS"); 3256 3257 /** 3258 * Constant for the "Indic Siyaq Numbers" Unicode 3259 * character block. 3260 * @since 12 3261 */ 3262 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3263 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3264 "INDIC SIYAQ NUMBERS", 3265 "INDICSIYAQNUMBERS"); 3266 3267 /** 3268 * Constant for the "Chess Symbols" Unicode 3269 * character block. 3270 * @since 12 3271 */ 3272 public static final UnicodeBlock CHESS_SYMBOLS = 3273 new UnicodeBlock("CHESS_SYMBOLS", 3274 "CHESS SYMBOLS", 3275 "CHESSSYMBOLS"); 3276 3277 /** 3278 * Constant for the "Elymaic" Unicode 3279 * character block. 3280 * @since 13 3281 */ 3282 public static final UnicodeBlock ELYMAIC = 3283 new UnicodeBlock("ELYMAIC"); 3284 3285 /** 3286 * Constant for the "Nandinagari" Unicode 3287 * character block. 3288 * @since 13 3289 */ 3290 public static final UnicodeBlock NANDINAGARI = 3291 new UnicodeBlock("NANDINAGARI"); 3292 3293 /** 3294 * Constant for the "Tamil Supplement" Unicode 3295 * character block. 3296 * @since 13 3297 */ 3298 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3299 new UnicodeBlock("TAMIL_SUPPLEMENT", 3300 "TAMIL SUPPLEMENT", 3301 "TAMILSUPPLEMENT"); 3302 3303 /** 3304 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3305 * character block. 3306 * @since 13 3307 */ 3308 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3309 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3310 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3311 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3312 3313 /** 3314 * Constant for the "Small Kana Extension" Unicode 3315 * character block. 3316 * @since 13 3317 */ 3318 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3319 new UnicodeBlock("SMALL_KANA_EXTENSION", 3320 "SMALL KANA EXTENSION", 3321 "SMALLKANAEXTENSION"); 3322 3323 /** 3324 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3325 * character block. 3326 * @since 13 3327 */ 3328 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3329 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3330 "NYIAKENG PUACHUE HMONG", 3331 "NYIAKENGPUACHUEHMONG"); 3332 3333 /** 3334 * Constant for the "Wancho" Unicode 3335 * character block. 3336 * @since 13 3337 */ 3338 public static final UnicodeBlock WANCHO = 3339 new UnicodeBlock("WANCHO"); 3340 3341 /** 3342 * Constant for the "Ottoman Siyaq Numbers" Unicode 3343 * character block. 3344 * @since 13 3345 */ 3346 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3347 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3348 "OTTOMAN SIYAQ NUMBERS", 3349 "OTTOMANSIYAQNUMBERS"); 3350 3351 /** 3352 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3353 * character block. 3354 * @since 13 3355 */ 3356 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3357 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3358 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3359 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3360 3361 /** 3362 * Constant for the "Yezidi" Unicode 3363 * character block. 3364 * @since 15 3365 */ 3366 public static final UnicodeBlock YEZIDI = 3367 new UnicodeBlock("YEZIDI"); 3368 3369 /** 3370 * Constant for the "Chorasmian" Unicode 3371 * character block. 3372 * @since 15 3373 */ 3374 public static final UnicodeBlock CHORASMIAN = 3375 new UnicodeBlock("CHORASMIAN"); 3376 3377 /** 3378 * Constant for the "Dives Akuru" Unicode 3379 * character block. 3380 * @since 15 3381 */ 3382 public static final UnicodeBlock DIVES_AKURU = 3383 new UnicodeBlock("DIVES_AKURU", 3384 "DIVES AKURU", 3385 "DIVESAKURU"); 3386 3387 /** 3388 * Constant for the "Lisu Supplement" Unicode 3389 * character block. 3390 * @since 15 3391 */ 3392 public static final UnicodeBlock LISU_SUPPLEMENT = 3393 new UnicodeBlock("LISU_SUPPLEMENT", 3394 "LISU SUPPLEMENT", 3395 "LISUSUPPLEMENT"); 3396 3397 /** 3398 * Constant for the "Khitan Small Script" Unicode 3399 * character block. 3400 * @since 15 3401 */ 3402 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3403 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3404 "KHITAN SMALL SCRIPT", 3405 "KHITANSMALLSCRIPT"); 3406 3407 /** 3408 * Constant for the "Tangut Supplement" Unicode 3409 * character block. 3410 * @since 15 3411 */ 3412 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3413 new UnicodeBlock("TANGUT_SUPPLEMENT", 3414 "TANGUT SUPPLEMENT", 3415 "TANGUTSUPPLEMENT"); 3416 3417 /** 3418 * Constant for the "Symbols for Legacy Computing" Unicode 3419 * character block. 3420 * @since 15 3421 */ 3422 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3423 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3424 "SYMBOLS FOR LEGACY COMPUTING", 3425 "SYMBOLSFORLEGACYCOMPUTING"); 3426 3427 /** 3428 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3429 * character block. 3430 * @since 15 3431 */ 3432 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3433 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3434 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3435 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3436 3437 /** 3438 * Constant for the "Arabic Extended-B" Unicode 3439 * character block. 3440 * @since 19 3441 */ 3442 public static final UnicodeBlock ARABIC_EXTENDED_B = 3443 new UnicodeBlock("ARABIC_EXTENDED_B", 3444 "ARABIC EXTENDED-B", 3445 "ARABICEXTENDED-B"); 3446 3447 /** 3448 * Constant for the "Vithkuqi" Unicode 3449 * character block. 3450 * @since 19 3451 */ 3452 public static final UnicodeBlock VITHKUQI = 3453 new UnicodeBlock("VITHKUQI"); 3454 3455 /** 3456 * Constant for the "Latin Extended-F" Unicode 3457 * character block. 3458 * @since 19 3459 */ 3460 public static final UnicodeBlock LATIN_EXTENDED_F = 3461 new UnicodeBlock("LATIN_EXTENDED_F", 3462 "LATIN EXTENDED-F", 3463 "LATINEXTENDED-F"); 3464 3465 /** 3466 * Constant for the "Old Uyghur" Unicode 3467 * character block. 3468 * @since 19 3469 */ 3470 public static final UnicodeBlock OLD_UYGHUR = 3471 new UnicodeBlock("OLD_UYGHUR", 3472 "OLD UYGHUR", 3473 "OLDUYGHUR"); 3474 3475 /** 3476 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3477 * character block. 3478 * @since 19 3479 */ 3480 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3481 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3482 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3483 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3484 3485 /** 3486 * Constant for the "Cypro-Minoan" Unicode 3487 * character block. 3488 * @since 19 3489 */ 3490 public static final UnicodeBlock CYPRO_MINOAN = 3491 new UnicodeBlock("CYPRO_MINOAN", 3492 "CYPRO-MINOAN", 3493 "CYPRO-MINOAN"); 3494 3495 /** 3496 * Constant for the "Tangsa" Unicode 3497 * character block. 3498 * @since 19 3499 */ 3500 public static final UnicodeBlock TANGSA = 3501 new UnicodeBlock("TANGSA"); 3502 3503 /** 3504 * Constant for the "Kana Extended-B" Unicode 3505 * character block. 3506 * @since 19 3507 */ 3508 public static final UnicodeBlock KANA_EXTENDED_B = 3509 new UnicodeBlock("KANA_EXTENDED_B", 3510 "KANA EXTENDED-B", 3511 "KANAEXTENDED-B"); 3512 3513 /** 3514 * Constant for the "Znamenny Musical Notation" Unicode 3515 * character block. 3516 * @since 19 3517 */ 3518 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3519 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3520 "ZNAMENNY MUSICAL NOTATION", 3521 "ZNAMENNYMUSICALNOTATION"); 3522 3523 /** 3524 * Constant for the "Latin Extended-G" Unicode 3525 * character block. 3526 * @since 19 3527 */ 3528 public static final UnicodeBlock LATIN_EXTENDED_G = 3529 new UnicodeBlock("LATIN_EXTENDED_G", 3530 "LATIN EXTENDED-G", 3531 "LATINEXTENDED-G"); 3532 3533 /** 3534 * Constant for the "Toto" Unicode 3535 * character block. 3536 * @since 19 3537 */ 3538 public static final UnicodeBlock TOTO = 3539 new UnicodeBlock("TOTO"); 3540 3541 /** 3542 * Constant for the "Ethiopic Extended-B" Unicode 3543 * character block. 3544 * @since 19 3545 */ 3546 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3547 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3548 "ETHIOPIC EXTENDED-B", 3549 "ETHIOPICEXTENDED-B"); 3550 3551 /** 3552 * Constant for the "Arabic Extended-C" Unicode 3553 * character block. 3554 * @since 20 3555 */ 3556 public static final UnicodeBlock ARABIC_EXTENDED_C = 3557 new UnicodeBlock("ARABIC_EXTENDED_C", 3558 "ARABIC EXTENDED-C", 3559 "ARABICEXTENDED-C"); 3560 3561 /** 3562 * Constant for the "Devanagari Extended-A" Unicode 3563 * character block. 3564 * @since 20 3565 */ 3566 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3567 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3568 "DEVANAGARI EXTENDED-A", 3569 "DEVANAGARIEXTENDED-A"); 3570 3571 /** 3572 * Constant for the "Kawi" Unicode 3573 * character block. 3574 * @since 20 3575 */ 3576 public static final UnicodeBlock KAWI = 3577 new UnicodeBlock("KAWI"); 3578 3579 /** 3580 * Constant for the "Kaktovik Numerals" Unicode 3581 * character block. 3582 * @since 20 3583 */ 3584 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3585 new UnicodeBlock("KAKTOVIK_NUMERALS", 3586 "KAKTOVIK NUMERALS", 3587 "KAKTOVIKNUMERALS"); 3588 3589 /** 3590 * Constant for the "Cyrillic Extended-D" Unicode 3591 * character block. 3592 * @since 20 3593 */ 3594 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3595 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3596 "CYRILLIC EXTENDED-D", 3597 "CYRILLICEXTENDED-D"); 3598 3599 /** 3600 * Constant for the "Nag Mundari" Unicode 3601 * character block. 3602 * @since 20 3603 */ 3604 public static final UnicodeBlock NAG_MUNDARI = 3605 new UnicodeBlock("NAG_MUNDARI", 3606 "NAG MUNDARI", 3607 "NAGMUNDARI"); 3608 3609 /** 3610 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3611 * character block. 3612 * @since 20 3613 */ 3614 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3615 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3616 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3617 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3618 3619 /** 3620 * Constant for the "CJK Unified Ideographs Extension I" Unicode 3621 * character block. 3622 * @since 22 3623 */ 3624 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 3625 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 3626 "CJK UNIFIED IDEOGRAPHS EXTENSION I", 3627 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI"); 3628 3629 /** 3630 * Constant for the "Todhri" Unicode 3631 * character block. 3632 * @since 24 3633 */ 3634 public static final UnicodeBlock TODHRI = 3635 new UnicodeBlock("TODHRI"); 3636 3637 /** 3638 * Constant for the "Garay" Unicode 3639 * character block. 3640 * @since 24 3641 */ 3642 public static final UnicodeBlock GARAY = 3643 new UnicodeBlock("GARAY"); 3644 3645 /** 3646 * Constant for the "Tulu-Tigalari" Unicode 3647 * character block. 3648 * @since 24 3649 */ 3650 public static final UnicodeBlock TULU_TIGALARI = 3651 new UnicodeBlock("TULU_TIGALARI", 3652 "TULU-TIGALARI"); 3653 3654 /** 3655 * Constant for the "Myanmar Extended-C" Unicode 3656 * character block. 3657 * @since 24 3658 */ 3659 public static final UnicodeBlock MYANMAR_EXTENDED_C = 3660 new UnicodeBlock("MYANMAR_EXTENDED_C", 3661 "MYANMAR EXTENDED-C", 3662 "MYANMAREXTENDED-C"); 3663 3664 /** 3665 * Constant for the "Sunuwar" Unicode 3666 * character block. 3667 * @since 24 3668 */ 3669 public static final UnicodeBlock SUNUWAR = 3670 new UnicodeBlock("SUNUWAR"); 3671 3672 /** 3673 * Constant for the "Egyptian Hieroglyphs Extended-A" Unicode 3674 * character block. 3675 * @since 24 3676 */ 3677 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS_EXTENDED_A = 3678 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS_EXTENDED_A", 3679 "EGYPTIAN HIEROGLYPHS EXTENDED-A", 3680 "EGYPTIANHIEROGLYPHSEXTENDED-A"); 3681 3682 /** 3683 * Constant for the "Gurung Khema" Unicode 3684 * character block. 3685 * @since 24 3686 */ 3687 public static final UnicodeBlock GURUNG_KHEMA = 3688 new UnicodeBlock("GURUNG_KHEMA", 3689 "GURUNG KHEMA", 3690 "GURUNGKHEMA"); 3691 3692 /** 3693 * Constant for the "Kirat Rai" Unicode 3694 * character block. 3695 * @since 24 3696 */ 3697 public static final UnicodeBlock KIRAT_RAI = 3698 new UnicodeBlock("KIRAT_RAI", 3699 "KIRAT RAI", 3700 "KIRATRAI"); 3701 3702 /** 3703 * Constant for the "Symbols for Legacy Computing Supplement" Unicode 3704 * character block. 3705 * @since 24 3706 */ 3707 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT = 3708 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT", 3709 "SYMBOLS FOR LEGACY COMPUTING SUPPLEMENT", 3710 "SYMBOLSFORLEGACYCOMPUTINGSUPPLEMENT"); 3711 3712 /** 3713 * Constant for the "Ol Onal" Unicode 3714 * character block. 3715 * @since 24 3716 */ 3717 public static final UnicodeBlock OL_ONAL = 3718 new UnicodeBlock("OL_ONAL", 3719 "OL ONAL", 3720 "OLONAL"); 3721 3722 /** 3723 * Constant for the "Sidetic" Unicode 3724 * character block. 3725 * @since 26 3726 */ 3727 public static final UnicodeBlock SIDETIC = 3728 new UnicodeBlock("SIDETIC"); 3729 3730 /** 3731 * Constant for the "Sharada Supplement" Unicode 3732 * character block. 3733 * @since 26 3734 */ 3735 public static final UnicodeBlock SHARADA_SUPPLEMENT = 3736 new UnicodeBlock("SHARADA_SUPPLEMENT", 3737 "SHARADA SUPPLEMENT", 3738 "SHARADASUPPLEMENT"); 3739 3740 /** 3741 * Constant for the "Tolong Siki" Unicode 3742 * character block. 3743 * @since 26 3744 */ 3745 public static final UnicodeBlock TOLONG_SIKI = 3746 new UnicodeBlock("TOLONG_SIKI", 3747 "TOLONG SIKI", 3748 "TOLONGSIKI"); 3749 3750 /** 3751 * Constant for the "Beria Erfe" Unicode 3752 * character block. 3753 * @since 26 3754 */ 3755 public static final UnicodeBlock BERIA_ERFE = 3756 new UnicodeBlock("BERIA_ERFE", 3757 "BERIA ERFE", 3758 "BERIAERFE"); 3759 3760 /** 3761 * Constant for the "Tangut Components Supplement" Unicode 3762 * character block. 3763 * @since 26 3764 */ 3765 public static final UnicodeBlock TANGUT_COMPONENTS_SUPPLEMENT = 3766 new UnicodeBlock("TANGUT_COMPONENTS_SUPPLEMENT", 3767 "TANGUT COMPONENTS SUPPLEMENT", 3768 "TANGUTCOMPONENTSSUPPLEMENT"); 3769 3770 /** 3771 * Constant for the "Miscellaneous Symbols Supplement" Unicode 3772 * character block. 3773 * @since 26 3774 */ 3775 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_SUPPLEMENT = 3776 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_SUPPLEMENT", 3777 "MISCELLANEOUS SYMBOLS SUPPLEMENT", 3778 "MISCELLANEOUSSYMBOLSSUPPLEMENT"); 3779 3780 /** 3781 * Constant for the "Tai Yo" Unicode 3782 * character block. 3783 * @since 26 3784 */ 3785 public static final UnicodeBlock TAI_YO = 3786 new UnicodeBlock("TAI_YO", 3787 "TAI YO", 3788 "TAIYO"); 3789 3790 /** 3791 * Constant for the "CJK Unified Ideographs Extension J" Unicode 3792 * character block. 3793 * @since 26 3794 */ 3795 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J = 3796 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J", 3797 "CJK UNIFIED IDEOGRAPHS EXTENSION J", 3798 "CJKUNIFIEDIDEOGRAPHSEXTENSIONJ"); 3799 3800 3801 private static final int[] blockStarts = { 3802 0x0000, // 0000..007F; Basic Latin 3803 0x0080, // 0080..00FF; Latin-1 Supplement 3804 0x0100, // 0100..017F; Latin Extended-A 3805 0x0180, // 0180..024F; Latin Extended-B 3806 0x0250, // 0250..02AF; IPA Extensions 3807 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3808 0x0300, // 0300..036F; Combining Diacritical Marks 3809 0x0370, // 0370..03FF; Greek and Coptic 3810 0x0400, // 0400..04FF; Cyrillic 3811 0x0500, // 0500..052F; Cyrillic Supplement 3812 0x0530, // 0530..058F; Armenian 3813 0x0590, // 0590..05FF; Hebrew 3814 0x0600, // 0600..06FF; Arabic 3815 0x0700, // 0700..074F; Syriac 3816 0x0750, // 0750..077F; Arabic Supplement 3817 0x0780, // 0780..07BF; Thaana 3818 0x07C0, // 07C0..07FF; NKo 3819 0x0800, // 0800..083F; Samaritan 3820 0x0840, // 0840..085F; Mandaic 3821 0x0860, // 0860..086F; Syriac Supplement 3822 0x0870, // 0870..089F; Arabic Extended-B 3823 0x08A0, // 08A0..08FF; Arabic Extended-A 3824 0x0900, // 0900..097F; Devanagari 3825 0x0980, // 0980..09FF; Bengali 3826 0x0A00, // 0A00..0A7F; Gurmukhi 3827 0x0A80, // 0A80..0AFF; Gujarati 3828 0x0B00, // 0B00..0B7F; Oriya 3829 0x0B80, // 0B80..0BFF; Tamil 3830 0x0C00, // 0C00..0C7F; Telugu 3831 0x0C80, // 0C80..0CFF; Kannada 3832 0x0D00, // 0D00..0D7F; Malayalam 3833 0x0D80, // 0D80..0DFF; Sinhala 3834 0x0E00, // 0E00..0E7F; Thai 3835 0x0E80, // 0E80..0EFF; Lao 3836 0x0F00, // 0F00..0FFF; Tibetan 3837 0x1000, // 1000..109F; Myanmar 3838 0x10A0, // 10A0..10FF; Georgian 3839 0x1100, // 1100..11FF; Hangul Jamo 3840 0x1200, // 1200..137F; Ethiopic 3841 0x1380, // 1380..139F; Ethiopic Supplement 3842 0x13A0, // 13A0..13FF; Cherokee 3843 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3844 0x1680, // 1680..169F; Ogham 3845 0x16A0, // 16A0..16FF; Runic 3846 0x1700, // 1700..171F; Tagalog 3847 0x1720, // 1720..173F; Hanunoo 3848 0x1740, // 1740..175F; Buhid 3849 0x1760, // 1760..177F; Tagbanwa 3850 0x1780, // 1780..17FF; Khmer 3851 0x1800, // 1800..18AF; Mongolian 3852 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3853 0x1900, // 1900..194F; Limbu 3854 0x1950, // 1950..197F; Tai Le 3855 0x1980, // 1980..19DF; New Tai Lue 3856 0x19E0, // 19E0..19FF; Khmer Symbols 3857 0x1A00, // 1A00..1A1F; Buginese 3858 0x1A20, // 1A20..1AAF; Tai Tham 3859 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3860 0x1B00, // 1B00..1B7F; Balinese 3861 0x1B80, // 1B80..1BBF; Sundanese 3862 0x1BC0, // 1BC0..1BFF; Batak 3863 0x1C00, // 1C00..1C4F; Lepcha 3864 0x1C50, // 1C50..1C7F; Ol Chiki 3865 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3866 0x1C90, // 1C90..1CBF; Georgian Extended 3867 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3868 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3869 0x1D00, // 1D00..1D7F; Phonetic Extensions 3870 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3871 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3872 0x1E00, // 1E00..1EFF; Latin Extended Additional 3873 0x1F00, // 1F00..1FFF; Greek Extended 3874 0x2000, // 2000..206F; General Punctuation 3875 0x2070, // 2070..209F; Superscripts and Subscripts 3876 0x20A0, // 20A0..20CF; Currency Symbols 3877 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3878 0x2100, // 2100..214F; Letterlike Symbols 3879 0x2150, // 2150..218F; Number Forms 3880 0x2190, // 2190..21FF; Arrows 3881 0x2200, // 2200..22FF; Mathematical Operators 3882 0x2300, // 2300..23FF; Miscellaneous Technical 3883 0x2400, // 2400..243F; Control Pictures 3884 0x2440, // 2440..245F; Optical Character Recognition 3885 0x2460, // 2460..24FF; Enclosed Alphanumerics 3886 0x2500, // 2500..257F; Box Drawing 3887 0x2580, // 2580..259F; Block Elements 3888 0x25A0, // 25A0..25FF; Geometric Shapes 3889 0x2600, // 2600..26FF; Miscellaneous Symbols 3890 0x2700, // 2700..27BF; Dingbats 3891 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3892 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3893 0x2800, // 2800..28FF; Braille Patterns 3894 0x2900, // 2900..297F; Supplemental Arrows-B 3895 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3896 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3897 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3898 0x2C00, // 2C00..2C5F; Glagolitic 3899 0x2C60, // 2C60..2C7F; Latin Extended-C 3900 0x2C80, // 2C80..2CFF; Coptic 3901 0x2D00, // 2D00..2D2F; Georgian Supplement 3902 0x2D30, // 2D30..2D7F; Tifinagh 3903 0x2D80, // 2D80..2DDF; Ethiopic Extended 3904 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3905 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3906 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3907 0x2F00, // 2F00..2FDF; Kangxi Radicals 3908 0x2FE0, // unassigned 3909 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3910 0x3000, // 3000..303F; CJK Symbols and Punctuation 3911 0x3040, // 3040..309F; Hiragana 3912 0x30A0, // 30A0..30FF; Katakana 3913 0x3100, // 3100..312F; Bopomofo 3914 0x3130, // 3130..318F; Hangul Compatibility Jamo 3915 0x3190, // 3190..319F; Kanbun 3916 0x31A0, // 31A0..31BF; Bopomofo Extended 3917 0x31C0, // 31C0..31EF; CJK Strokes 3918 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3919 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3920 0x3300, // 3300..33FF; CJK Compatibility 3921 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3922 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3923 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3924 0xA000, // A000..A48F; Yi Syllables 3925 0xA490, // A490..A4CF; Yi Radicals 3926 0xA4D0, // A4D0..A4FF; Lisu 3927 0xA500, // A500..A63F; Vai 3928 0xA640, // A640..A69F; Cyrillic Extended-B 3929 0xA6A0, // A6A0..A6FF; Bamum 3930 0xA700, // A700..A71F; Modifier Tone Letters 3931 0xA720, // A720..A7FF; Latin Extended-D 3932 0xA800, // A800..A82F; Syloti Nagri 3933 0xA830, // A830..A83F; Common Indic Number Forms 3934 0xA840, // A840..A87F; Phags-pa 3935 0xA880, // A880..A8DF; Saurashtra 3936 0xA8E0, // A8E0..A8FF; Devanagari Extended 3937 0xA900, // A900..A92F; Kayah Li 3938 0xA930, // A930..A95F; Rejang 3939 0xA960, // A960..A97F; Hangul Jamo Extended-A 3940 0xA980, // A980..A9DF; Javanese 3941 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3942 0xAA00, // AA00..AA5F; Cham 3943 0xAA60, // AA60..AA7F; Myanmar Extended-A 3944 0xAA80, // AA80..AADF; Tai Viet 3945 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3946 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3947 0xAB30, // AB30..AB6F; Latin Extended-E 3948 0xAB70, // AB70..ABBF; Cherokee Supplement 3949 0xABC0, // ABC0..ABFF; Meetei Mayek 3950 0xAC00, // AC00..D7AF; Hangul Syllables 3951 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3952 0xD800, // D800..DB7F; High Surrogates 3953 0xDB80, // DB80..DBFF; High Private Use Surrogates 3954 0xDC00, // DC00..DFFF; Low Surrogates 3955 0xE000, // E000..F8FF; Private Use Area 3956 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3957 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3958 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3959 0xFE00, // FE00..FE0F; Variation Selectors 3960 0xFE10, // FE10..FE1F; Vertical Forms 3961 0xFE20, // FE20..FE2F; Combining Half Marks 3962 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3963 0xFE50, // FE50..FE6F; Small Form Variants 3964 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3965 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3966 0xFFF0, // FFF0..FFFF; Specials 3967 0x10000, // 10000..1007F; Linear B Syllabary 3968 0x10080, // 10080..100FF; Linear B Ideograms 3969 0x10100, // 10100..1013F; Aegean Numbers 3970 0x10140, // 10140..1018F; Ancient Greek Numbers 3971 0x10190, // 10190..101CF; Ancient Symbols 3972 0x101D0, // 101D0..101FF; Phaistos Disc 3973 0x10200, // unassigned 3974 0x10280, // 10280..1029F; Lycian 3975 0x102A0, // 102A0..102DF; Carian 3976 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3977 0x10300, // 10300..1032F; Old Italic 3978 0x10330, // 10330..1034F; Gothic 3979 0x10350, // 10350..1037F; Old Permic 3980 0x10380, // 10380..1039F; Ugaritic 3981 0x103A0, // 103A0..103DF; Old Persian 3982 0x103E0, // unassigned 3983 0x10400, // 10400..1044F; Deseret 3984 0x10450, // 10450..1047F; Shavian 3985 0x10480, // 10480..104AF; Osmanya 3986 0x104B0, // 104B0..104FF; Osage 3987 0x10500, // 10500..1052F; Elbasan 3988 0x10530, // 10530..1056F; Caucasian Albanian 3989 0x10570, // 10570..105BF; Vithkuqi 3990 0x105C0, // 105C0..105FF; Todhri 3991 0x10600, // 10600..1077F; Linear A 3992 0x10780, // 10780..107BF; Latin Extended-F 3993 0x107C0, // unassigned 3994 0x10800, // 10800..1083F; Cypriot Syllabary 3995 0x10840, // 10840..1085F; Imperial Aramaic 3996 0x10860, // 10860..1087F; Palmyrene 3997 0x10880, // 10880..108AF; Nabataean 3998 0x108B0, // unassigned 3999 0x108E0, // 108E0..108FF; Hatran 4000 0x10900, // 10900..1091F; Phoenician 4001 0x10920, // 10920..1093F; Lydian 4002 0x10940, // 10940..1095F; Sidetic 4003 0x10960, // unassigned 4004 0x10980, // 10980..1099F; Meroitic Hieroglyphs 4005 0x109A0, // 109A0..109FF; Meroitic Cursive 4006 0x10A00, // 10A00..10A5F; Kharoshthi 4007 0x10A60, // 10A60..10A7F; Old South Arabian 4008 0x10A80, // 10A80..10A9F; Old North Arabian 4009 0x10AA0, // unassigned 4010 0x10AC0, // 10AC0..10AFF; Manichaean 4011 0x10B00, // 10B00..10B3F; Avestan 4012 0x10B40, // 10B40..10B5F; Inscriptional Parthian 4013 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 4014 0x10B80, // 10B80..10BAF; Psalter Pahlavi 4015 0x10BB0, // unassigned 4016 0x10C00, // 10C00..10C4F; Old Turkic 4017 0x10C50, // unassigned 4018 0x10C80, // 10C80..10CFF; Old Hungarian 4019 0x10D00, // 10D00..10D3F; Hanifi Rohingya 4020 0x10D40, // 10D40..10D8F; Garay 4021 0x10D90, // unassigned 4022 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 4023 0x10E80, // 10E80..10EBF; Yezidi 4024 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 4025 0x10F00, // 10F00..10F2F; Old Sogdian 4026 0x10F30, // 10F30..10F6F; Sogdian 4027 0x10F70, // 10F70..10FAF; Old Uyghur 4028 0x10FB0, // 10FB0..10FDF; Chorasmian 4029 0x10FE0, // 10FE0..10FFF; Elymaic 4030 0x11000, // 11000..1107F; Brahmi 4031 0x11080, // 11080..110CF; Kaithi 4032 0x110D0, // 110D0..110FF; Sora Sompeng 4033 0x11100, // 11100..1114F; Chakma 4034 0x11150, // 11150..1117F; Mahajani 4035 0x11180, // 11180..111DF; Sharada 4036 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 4037 0x11200, // 11200..1124F; Khojki 4038 0x11250, // unassigned 4039 0x11280, // 11280..112AF; Multani 4040 0x112B0, // 112B0..112FF; Khudawadi 4041 0x11300, // 11300..1137F; Grantha 4042 0x11380, // 11380..113FF; Tulu-Tigalari 4043 0x11400, // 11400..1147F; Newa 4044 0x11480, // 11480..114DF; Tirhuta 4045 0x114E0, // unassigned 4046 0x11580, // 11580..115FF; Siddham 4047 0x11600, // 11600..1165F; Modi 4048 0x11660, // 11660..1167F; Mongolian Supplement 4049 0x11680, // 11680..116CF; Takri 4050 0x116D0, // 116D0..116FF; Myanmar Extended-C 4051 0x11700, // 11700..1174F; Ahom 4052 0x11750, // unassigned 4053 0x11800, // 11800..1184F; Dogra 4054 0x11850, // unassigned 4055 0x118A0, // 118A0..118FF; Warang Citi 4056 0x11900, // 11900..1195F; Dives Akuru 4057 0x11960, // unassigned 4058 0x119A0, // 119A0..119FF; Nandinagari 4059 0x11A00, // 11A00..11A4F; Zanabazar Square 4060 0x11A50, // 11A50..11AAF; Soyombo 4061 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 4062 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 4063 0x11B00, // 11B00..11B5F; Devanagari Extended-A 4064 0x11B60, // 11B60..11B7F; Sharada Supplement 4065 0x11B80, // unassigned 4066 0x11BC0, // 11BC0..11BFF; Sunuwar 4067 0x11C00, // 11C00..11C6F; Bhaiksuki 4068 0x11C70, // 11C70..11CBF; Marchen 4069 0x11CC0, // unassigned 4070 0x11D00, // 11D00..11D5F; Masaram Gondi 4071 0x11D60, // 11D60..11DAF; Gunjala Gondi 4072 0x11DB0, // 11DB0..11DEF; Tolong Siki 4073 0x11DF0, // unassigned 4074 0x11EE0, // 11EE0..11EFF; Makasar 4075 0x11F00, // 11F00..11F5F; Kawi 4076 0x11F60, // unassigned 4077 0x11FB0, // 11FB0..11FBF; Lisu Supplement 4078 0x11FC0, // 11FC0..11FFF; Tamil Supplement 4079 0x12000, // 12000..123FF; Cuneiform 4080 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 4081 0x12480, // 12480..1254F; Early Dynastic Cuneiform 4082 0x12550, // unassigned 4083 0x12F90, // 12F90..12FFF; Cypro-Minoan 4084 0x13000, // 13000..1342F; Egyptian Hieroglyphs 4085 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 4086 0x13460, // 13460..143FF; Egyptian Hieroglyphs Extended-A 4087 0x14400, // 14400..1467F; Anatolian Hieroglyphs 4088 0x14680, // unassigned 4089 0x16100, // 16100..1613F; Gurung Khema 4090 0x16140, // unassigned 4091 0x16800, // 16800..16A3F; Bamum Supplement 4092 0x16A40, // 16A40..16A6F; Mro 4093 0x16A70, // 16A70..16ACF; Tangsa 4094 0x16AD0, // 16AD0..16AFF; Bassa Vah 4095 0x16B00, // 16B00..16B8F; Pahawh Hmong 4096 0x16B90, // unassigned 4097 0x16D40, // 16D40..16D7F; Kirat Rai 4098 0x16D80, // unassigned 4099 0x16E40, // 16E40..16E9F; Medefaidrin 4100 0x16EA0, // 16EA0..16EDF; Beria Erfe 4101 0x16EE0, // unassigned 4102 0x16F00, // 16F00..16F9F; Miao 4103 0x16FA0, // unassigned 4104 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 4105 0x17000, // 17000..187FF; Tangut 4106 0x18800, // 18800..18AFF; Tangut Components 4107 0x18B00, // 18B00..18CFF; Khitan Small Script 4108 0x18D00, // 18D00..18D7F; Tangut Supplement 4109 0x18D80, // 18D80..18DFF; Tangut Components Supplement 4110 0x18E00, // unassigned 4111 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 4112 0x1B000, // 1B000..1B0FF; Kana Supplement 4113 0x1B100, // 1B100..1B12F; Kana Extended-A 4114 0x1B130, // 1B130..1B16F; Small Kana Extension 4115 0x1B170, // 1B170..1B2FF; Nushu 4116 0x1B300, // unassigned 4117 0x1BC00, // 1BC00..1BC9F; Duployan 4118 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 4119 0x1BCB0, // unassigned 4120 0x1CC00, // 1CC00..1CEBF; Symbols for Legacy Computing Supplement 4121 0x1CEC0, // 1CEC0..1CEFF; Miscellaneous Symbols Supplement 4122 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 4123 0x1CFD0, // unassigned 4124 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 4125 0x1D100, // 1D100..1D1FF; Musical Symbols 4126 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 4127 0x1D250, // unassigned 4128 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 4129 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 4130 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 4131 0x1D360, // 1D360..1D37F; Counting Rod Numerals 4132 0x1D380, // unassigned 4133 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 4134 0x1D800, // 1D800..1DAAF; Sutton SignWriting 4135 0x1DAB0, // unassigned 4136 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 4137 0x1E000, // 1E000..1E02F; Glagolitic Supplement 4138 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 4139 0x1E090, // unassigned 4140 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 4141 0x1E150, // unassigned 4142 0x1E290, // 1E290..1E2BF; Toto 4143 0x1E2C0, // 1E2C0..1E2FF; Wancho 4144 0x1E300, // unassigned 4145 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 4146 0x1E500, // unassigned 4147 0x1E5D0, // 1E5D0..1E5FF; Ol Onal 4148 0x1E600, // unassigned 4149 0x1E6C0, // 1E6C0..1E6FF; Tai Yo 4150 0x1E700, // unassigned 4151 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 4152 0x1E800, // 1E800..1E8DF; Mende Kikakui 4153 0x1E8E0, // unassigned 4154 0x1E900, // 1E900..1E95F; Adlam 4155 0x1E960, // unassigned 4156 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 4157 0x1ECC0, // unassigned 4158 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 4159 0x1ED50, // unassigned 4160 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 4161 0x1EF00, // unassigned 4162 0x1F000, // 1F000..1F02F; Mahjong Tiles 4163 0x1F030, // 1F030..1F09F; Domino Tiles 4164 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 4165 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 4166 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 4167 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 4168 0x1F600, // 1F600..1F64F; Emoticons 4169 0x1F650, // 1F650..1F67F; Ornamental Dingbats 4170 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 4171 0x1F700, // 1F700..1F77F; Alchemical Symbols 4172 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 4173 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 4174 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 4175 0x1FA00, // 1FA00..1FA6F; Chess Symbols 4176 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 4177 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 4178 0x1FC00, // unassigned 4179 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 4180 0x2A6E0, // unassigned 4181 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 4182 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 4183 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 4184 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 4185 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I 4186 0x2EE60, // unassigned 4187 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 4188 0x2FA20, // unassigned 4189 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 4190 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 4191 0x323B0, // 323B0..3347F; CJK Unified Ideographs Extension J 4192 0x33480, // unassigned 4193 0xE0000, // E0000..E007F; Tags 4194 0xE0080, // unassigned 4195 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4196 0xE01F0, // unassigned 4197 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4198 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4199 }; 4200 4201 private static final UnicodeBlock[] blocks = { 4202 BASIC_LATIN, 4203 LATIN_1_SUPPLEMENT, 4204 LATIN_EXTENDED_A, 4205 LATIN_EXTENDED_B, 4206 IPA_EXTENSIONS, 4207 SPACING_MODIFIER_LETTERS, 4208 COMBINING_DIACRITICAL_MARKS, 4209 GREEK, 4210 CYRILLIC, 4211 CYRILLIC_SUPPLEMENTARY, 4212 ARMENIAN, 4213 HEBREW, 4214 ARABIC, 4215 SYRIAC, 4216 ARABIC_SUPPLEMENT, 4217 THAANA, 4218 NKO, 4219 SAMARITAN, 4220 MANDAIC, 4221 SYRIAC_SUPPLEMENT, 4222 ARABIC_EXTENDED_B, 4223 ARABIC_EXTENDED_A, 4224 DEVANAGARI, 4225 BENGALI, 4226 GURMUKHI, 4227 GUJARATI, 4228 ORIYA, 4229 TAMIL, 4230 TELUGU, 4231 KANNADA, 4232 MALAYALAM, 4233 SINHALA, 4234 THAI, 4235 LAO, 4236 TIBETAN, 4237 MYANMAR, 4238 GEORGIAN, 4239 HANGUL_JAMO, 4240 ETHIOPIC, 4241 ETHIOPIC_SUPPLEMENT, 4242 CHEROKEE, 4243 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4244 OGHAM, 4245 RUNIC, 4246 TAGALOG, 4247 HANUNOO, 4248 BUHID, 4249 TAGBANWA, 4250 KHMER, 4251 MONGOLIAN, 4252 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4253 LIMBU, 4254 TAI_LE, 4255 NEW_TAI_LUE, 4256 KHMER_SYMBOLS, 4257 BUGINESE, 4258 TAI_THAM, 4259 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4260 BALINESE, 4261 SUNDANESE, 4262 BATAK, 4263 LEPCHA, 4264 OL_CHIKI, 4265 CYRILLIC_EXTENDED_C, 4266 GEORGIAN_EXTENDED, 4267 SUNDANESE_SUPPLEMENT, 4268 VEDIC_EXTENSIONS, 4269 PHONETIC_EXTENSIONS, 4270 PHONETIC_EXTENSIONS_SUPPLEMENT, 4271 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4272 LATIN_EXTENDED_ADDITIONAL, 4273 GREEK_EXTENDED, 4274 GENERAL_PUNCTUATION, 4275 SUPERSCRIPTS_AND_SUBSCRIPTS, 4276 CURRENCY_SYMBOLS, 4277 COMBINING_MARKS_FOR_SYMBOLS, 4278 LETTERLIKE_SYMBOLS, 4279 NUMBER_FORMS, 4280 ARROWS, 4281 MATHEMATICAL_OPERATORS, 4282 MISCELLANEOUS_TECHNICAL, 4283 CONTROL_PICTURES, 4284 OPTICAL_CHARACTER_RECOGNITION, 4285 ENCLOSED_ALPHANUMERICS, 4286 BOX_DRAWING, 4287 BLOCK_ELEMENTS, 4288 GEOMETRIC_SHAPES, 4289 MISCELLANEOUS_SYMBOLS, 4290 DINGBATS, 4291 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4292 SUPPLEMENTAL_ARROWS_A, 4293 BRAILLE_PATTERNS, 4294 SUPPLEMENTAL_ARROWS_B, 4295 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4296 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4297 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4298 GLAGOLITIC, 4299 LATIN_EXTENDED_C, 4300 COPTIC, 4301 GEORGIAN_SUPPLEMENT, 4302 TIFINAGH, 4303 ETHIOPIC_EXTENDED, 4304 CYRILLIC_EXTENDED_A, 4305 SUPPLEMENTAL_PUNCTUATION, 4306 CJK_RADICALS_SUPPLEMENT, 4307 KANGXI_RADICALS, 4308 null, 4309 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4310 CJK_SYMBOLS_AND_PUNCTUATION, 4311 HIRAGANA, 4312 KATAKANA, 4313 BOPOMOFO, 4314 HANGUL_COMPATIBILITY_JAMO, 4315 KANBUN, 4316 BOPOMOFO_EXTENDED, 4317 CJK_STROKES, 4318 KATAKANA_PHONETIC_EXTENSIONS, 4319 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4320 CJK_COMPATIBILITY, 4321 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4322 YIJING_HEXAGRAM_SYMBOLS, 4323 CJK_UNIFIED_IDEOGRAPHS, 4324 YI_SYLLABLES, 4325 YI_RADICALS, 4326 LISU, 4327 VAI, 4328 CYRILLIC_EXTENDED_B, 4329 BAMUM, 4330 MODIFIER_TONE_LETTERS, 4331 LATIN_EXTENDED_D, 4332 SYLOTI_NAGRI, 4333 COMMON_INDIC_NUMBER_FORMS, 4334 PHAGS_PA, 4335 SAURASHTRA, 4336 DEVANAGARI_EXTENDED, 4337 KAYAH_LI, 4338 REJANG, 4339 HANGUL_JAMO_EXTENDED_A, 4340 JAVANESE, 4341 MYANMAR_EXTENDED_B, 4342 CHAM, 4343 MYANMAR_EXTENDED_A, 4344 TAI_VIET, 4345 MEETEI_MAYEK_EXTENSIONS, 4346 ETHIOPIC_EXTENDED_A, 4347 LATIN_EXTENDED_E, 4348 CHEROKEE_SUPPLEMENT, 4349 MEETEI_MAYEK, 4350 HANGUL_SYLLABLES, 4351 HANGUL_JAMO_EXTENDED_B, 4352 HIGH_SURROGATES, 4353 HIGH_PRIVATE_USE_SURROGATES, 4354 LOW_SURROGATES, 4355 PRIVATE_USE_AREA, 4356 CJK_COMPATIBILITY_IDEOGRAPHS, 4357 ALPHABETIC_PRESENTATION_FORMS, 4358 ARABIC_PRESENTATION_FORMS_A, 4359 VARIATION_SELECTORS, 4360 VERTICAL_FORMS, 4361 COMBINING_HALF_MARKS, 4362 CJK_COMPATIBILITY_FORMS, 4363 SMALL_FORM_VARIANTS, 4364 ARABIC_PRESENTATION_FORMS_B, 4365 HALFWIDTH_AND_FULLWIDTH_FORMS, 4366 SPECIALS, 4367 LINEAR_B_SYLLABARY, 4368 LINEAR_B_IDEOGRAMS, 4369 AEGEAN_NUMBERS, 4370 ANCIENT_GREEK_NUMBERS, 4371 ANCIENT_SYMBOLS, 4372 PHAISTOS_DISC, 4373 null, 4374 LYCIAN, 4375 CARIAN, 4376 COPTIC_EPACT_NUMBERS, 4377 OLD_ITALIC, 4378 GOTHIC, 4379 OLD_PERMIC, 4380 UGARITIC, 4381 OLD_PERSIAN, 4382 null, 4383 DESERET, 4384 SHAVIAN, 4385 OSMANYA, 4386 OSAGE, 4387 ELBASAN, 4388 CAUCASIAN_ALBANIAN, 4389 VITHKUQI, 4390 TODHRI, 4391 LINEAR_A, 4392 LATIN_EXTENDED_F, 4393 null, 4394 CYPRIOT_SYLLABARY, 4395 IMPERIAL_ARAMAIC, 4396 PALMYRENE, 4397 NABATAEAN, 4398 null, 4399 HATRAN, 4400 PHOENICIAN, 4401 LYDIAN, 4402 SIDETIC, 4403 null, 4404 MEROITIC_HIEROGLYPHS, 4405 MEROITIC_CURSIVE, 4406 KHAROSHTHI, 4407 OLD_SOUTH_ARABIAN, 4408 OLD_NORTH_ARABIAN, 4409 null, 4410 MANICHAEAN, 4411 AVESTAN, 4412 INSCRIPTIONAL_PARTHIAN, 4413 INSCRIPTIONAL_PAHLAVI, 4414 PSALTER_PAHLAVI, 4415 null, 4416 OLD_TURKIC, 4417 null, 4418 OLD_HUNGARIAN, 4419 HANIFI_ROHINGYA, 4420 GARAY, 4421 null, 4422 RUMI_NUMERAL_SYMBOLS, 4423 YEZIDI, 4424 ARABIC_EXTENDED_C, 4425 OLD_SOGDIAN, 4426 SOGDIAN, 4427 OLD_UYGHUR, 4428 CHORASMIAN, 4429 ELYMAIC, 4430 BRAHMI, 4431 KAITHI, 4432 SORA_SOMPENG, 4433 CHAKMA, 4434 MAHAJANI, 4435 SHARADA, 4436 SINHALA_ARCHAIC_NUMBERS, 4437 KHOJKI, 4438 null, 4439 MULTANI, 4440 KHUDAWADI, 4441 GRANTHA, 4442 TULU_TIGALARI, 4443 NEWA, 4444 TIRHUTA, 4445 null, 4446 SIDDHAM, 4447 MODI, 4448 MONGOLIAN_SUPPLEMENT, 4449 TAKRI, 4450 MYANMAR_EXTENDED_C, 4451 AHOM, 4452 null, 4453 DOGRA, 4454 null, 4455 WARANG_CITI, 4456 DIVES_AKURU, 4457 null, 4458 NANDINAGARI, 4459 ZANABAZAR_SQUARE, 4460 SOYOMBO, 4461 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4462 PAU_CIN_HAU, 4463 DEVANAGARI_EXTENDED_A, 4464 SHARADA_SUPPLEMENT, 4465 null, 4466 SUNUWAR, 4467 BHAIKSUKI, 4468 MARCHEN, 4469 null, 4470 MASARAM_GONDI, 4471 GUNJALA_GONDI, 4472 TOLONG_SIKI, 4473 null, 4474 MAKASAR, 4475 KAWI, 4476 null, 4477 LISU_SUPPLEMENT, 4478 TAMIL_SUPPLEMENT, 4479 CUNEIFORM, 4480 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4481 EARLY_DYNASTIC_CUNEIFORM, 4482 null, 4483 CYPRO_MINOAN, 4484 EGYPTIAN_HIEROGLYPHS, 4485 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4486 EGYPTIAN_HIEROGLYPHS_EXTENDED_A, 4487 ANATOLIAN_HIEROGLYPHS, 4488 null, 4489 GURUNG_KHEMA, 4490 null, 4491 BAMUM_SUPPLEMENT, 4492 MRO, 4493 TANGSA, 4494 BASSA_VAH, 4495 PAHAWH_HMONG, 4496 null, 4497 KIRAT_RAI, 4498 null, 4499 MEDEFAIDRIN, 4500 BERIA_ERFE, 4501 null, 4502 MIAO, 4503 null, 4504 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4505 TANGUT, 4506 TANGUT_COMPONENTS, 4507 KHITAN_SMALL_SCRIPT, 4508 TANGUT_SUPPLEMENT, 4509 TANGUT_COMPONENTS_SUPPLEMENT, 4510 null, 4511 KANA_EXTENDED_B, 4512 KANA_SUPPLEMENT, 4513 KANA_EXTENDED_A, 4514 SMALL_KANA_EXTENSION, 4515 NUSHU, 4516 null, 4517 DUPLOYAN, 4518 SHORTHAND_FORMAT_CONTROLS, 4519 null, 4520 SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT, 4521 MISCELLANEOUS_SYMBOLS_SUPPLEMENT, 4522 ZNAMENNY_MUSICAL_NOTATION, 4523 null, 4524 BYZANTINE_MUSICAL_SYMBOLS, 4525 MUSICAL_SYMBOLS, 4526 ANCIENT_GREEK_MUSICAL_NOTATION, 4527 null, 4528 KAKTOVIK_NUMERALS, 4529 MAYAN_NUMERALS, 4530 TAI_XUAN_JING_SYMBOLS, 4531 COUNTING_ROD_NUMERALS, 4532 null, 4533 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4534 SUTTON_SIGNWRITING, 4535 null, 4536 LATIN_EXTENDED_G, 4537 GLAGOLITIC_SUPPLEMENT, 4538 CYRILLIC_EXTENDED_D, 4539 null, 4540 NYIAKENG_PUACHUE_HMONG, 4541 null, 4542 TOTO, 4543 WANCHO, 4544 null, 4545 NAG_MUNDARI, 4546 null, 4547 OL_ONAL, 4548 null, 4549 TAI_YO, 4550 null, 4551 ETHIOPIC_EXTENDED_B, 4552 MENDE_KIKAKUI, 4553 null, 4554 ADLAM, 4555 null, 4556 INDIC_SIYAQ_NUMBERS, 4557 null, 4558 OTTOMAN_SIYAQ_NUMBERS, 4559 null, 4560 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4561 null, 4562 MAHJONG_TILES, 4563 DOMINO_TILES, 4564 PLAYING_CARDS, 4565 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4566 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4567 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4568 EMOTICONS, 4569 ORNAMENTAL_DINGBATS, 4570 TRANSPORT_AND_MAP_SYMBOLS, 4571 ALCHEMICAL_SYMBOLS, 4572 GEOMETRIC_SHAPES_EXTENDED, 4573 SUPPLEMENTAL_ARROWS_C, 4574 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4575 CHESS_SYMBOLS, 4576 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4577 SYMBOLS_FOR_LEGACY_COMPUTING, 4578 null, 4579 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4580 null, 4581 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4582 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4583 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4584 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4585 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, 4586 null, 4587 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4588 null, 4589 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4590 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4591 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J, 4592 null, 4593 TAGS, 4594 null, 4595 VARIATION_SELECTORS_SUPPLEMENT, 4596 null, 4597 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4598 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4599 }; 4600 4601 4602 /** 4603 * Returns the object representing the Unicode block containing the 4604 * given character, or {@code null} if the character is not a 4605 * member of a defined block. 4606 * 4607 * <p><b>Note:</b> This method cannot handle 4608 * <a href="Character.html#supplementary"> supplementary 4609 * characters</a>. To support all Unicode characters, including 4610 * supplementary characters, use the {@link #of(int)} method. 4611 * 4612 * @param c The character in question 4613 * @return The {@code UnicodeBlock} instance representing the 4614 * Unicode block of which this character is a member, or 4615 * {@code null} if the character is not a member of any 4616 * Unicode block 4617 */ 4618 public static UnicodeBlock of(char c) { 4619 return of((int)c); 4620 } 4621 4622 /** 4623 * Returns the object representing the Unicode block 4624 * containing the given character (Unicode code point), or 4625 * {@code null} if the character is not a member of a 4626 * defined block. 4627 * 4628 * @param codePoint the character (Unicode code point) in question. 4629 * @return The {@code UnicodeBlock} instance representing the 4630 * Unicode block of which this character is a member, or 4631 * {@code null} if the character is not a member of any 4632 * Unicode block 4633 * @throws IllegalArgumentException if the specified 4634 * {@code codePoint} is an invalid Unicode code point. 4635 * @see Character#isValidCodePoint(int) 4636 * @since 1.5 4637 */ 4638 public static UnicodeBlock of(int codePoint) { 4639 if (!isValidCodePoint(codePoint)) { 4640 throw new IllegalArgumentException( 4641 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4642 } 4643 4644 int top, bottom, current; 4645 bottom = 0; 4646 top = blockStarts.length; 4647 current = top/2; 4648 4649 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4650 while (top - bottom > 1) { 4651 if (codePoint >= blockStarts[current]) { 4652 bottom = current; 4653 } else { 4654 top = current; 4655 } 4656 current = (top + bottom) / 2; 4657 } 4658 return blocks[current]; 4659 } 4660 4661 /** 4662 * Returns the UnicodeBlock with the given name. Block 4663 * names are determined by The Unicode Standard. The file 4664 * {@code Blocks.txt} defines blocks for a particular 4665 * version of the standard. The {@link Character} class specifies 4666 * the version of the standard that it supports. 4667 * <p> 4668 * This method accepts block names in the following forms: 4669 * <ol> 4670 * <li> Canonical block names as defined by the Unicode Standard. 4671 * For example, the standard defines a "Basic Latin" block. Therefore, this 4672 * method accepts "Basic Latin" as a valid block name. The documentation of 4673 * each UnicodeBlock provides the canonical name. 4674 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4675 * is a valid block name for the "Basic Latin" block. 4676 * <li>The text representation of each constant UnicodeBlock identifier. 4677 * For example, this method will return the {@link #BASIC_LATIN} block if 4678 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4679 * hyphens in the canonical name with underscores. 4680 * </ol> 4681 * Finally, character case is ignored for all of the valid block name forms. 4682 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4683 * The en_US locale's case mapping rules are used to provide case-insensitive 4684 * string comparisons for block name validation. 4685 * <p> 4686 * If the Unicode Standard changes block names, both the previous and 4687 * current names will be accepted. 4688 * 4689 * @param blockName A {@code UnicodeBlock} name. 4690 * @return The {@code UnicodeBlock} instance identified 4691 * by {@code blockName} 4692 * @throws IllegalArgumentException if {@code blockName} is an 4693 * invalid name 4694 * @throws NullPointerException if {@code blockName} is null 4695 * @since 1.5 4696 */ 4697 public static final UnicodeBlock forName(String blockName) { 4698 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4699 if (block == null) { 4700 throw new IllegalArgumentException("Not a valid block name: " 4701 + blockName); 4702 } 4703 return block; 4704 } 4705 } 4706 4707 4708 /** 4709 * A family of character subsets representing the character scripts 4710 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4711 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4712 * character is assigned to a single Unicode script, either a specific 4713 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4714 * one of the following three special values, 4715 * {@link Character.UnicodeScript#INHERITED Inherited}, 4716 * {@link Character.UnicodeScript#COMMON Common} or 4717 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4718 * 4719 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4720 * @since 1.7 4721 */ 4722 public static enum UnicodeScript { 4723 4724 /** 4725 * Unicode script "Common". 4726 */ 4727 COMMON, 4728 4729 /** 4730 * Unicode script "Latin". 4731 */ 4732 LATIN, 4733 4734 /** 4735 * Unicode script "Greek". 4736 */ 4737 GREEK, 4738 4739 /** 4740 * Unicode script "Cyrillic". 4741 */ 4742 CYRILLIC, 4743 4744 /** 4745 * Unicode script "Armenian". 4746 */ 4747 ARMENIAN, 4748 4749 /** 4750 * Unicode script "Hebrew". 4751 */ 4752 HEBREW, 4753 4754 /** 4755 * Unicode script "Arabic". 4756 */ 4757 ARABIC, 4758 4759 /** 4760 * Unicode script "Syriac". 4761 */ 4762 SYRIAC, 4763 4764 /** 4765 * Unicode script "Thaana". 4766 */ 4767 THAANA, 4768 4769 /** 4770 * Unicode script "Devanagari". 4771 */ 4772 DEVANAGARI, 4773 4774 /** 4775 * Unicode script "Bengali". 4776 */ 4777 BENGALI, 4778 4779 /** 4780 * Unicode script "Gurmukhi". 4781 */ 4782 GURMUKHI, 4783 4784 /** 4785 * Unicode script "Gujarati". 4786 */ 4787 GUJARATI, 4788 4789 /** 4790 * Unicode script "Oriya". 4791 */ 4792 ORIYA, 4793 4794 /** 4795 * Unicode script "Tamil". 4796 */ 4797 TAMIL, 4798 4799 /** 4800 * Unicode script "Telugu". 4801 */ 4802 TELUGU, 4803 4804 /** 4805 * Unicode script "Kannada". 4806 */ 4807 KANNADA, 4808 4809 /** 4810 * Unicode script "Malayalam". 4811 */ 4812 MALAYALAM, 4813 4814 /** 4815 * Unicode script "Sinhala". 4816 */ 4817 SINHALA, 4818 4819 /** 4820 * Unicode script "Thai". 4821 */ 4822 THAI, 4823 4824 /** 4825 * Unicode script "Lao". 4826 */ 4827 LAO, 4828 4829 /** 4830 * Unicode script "Tibetan". 4831 */ 4832 TIBETAN, 4833 4834 /** 4835 * Unicode script "Myanmar". 4836 */ 4837 MYANMAR, 4838 4839 /** 4840 * Unicode script "Georgian". 4841 */ 4842 GEORGIAN, 4843 4844 /** 4845 * Unicode script "Hangul". 4846 */ 4847 HANGUL, 4848 4849 /** 4850 * Unicode script "Ethiopic". 4851 */ 4852 ETHIOPIC, 4853 4854 /** 4855 * Unicode script "Cherokee". 4856 */ 4857 CHEROKEE, 4858 4859 /** 4860 * Unicode script "Canadian_Aboriginal". 4861 */ 4862 CANADIAN_ABORIGINAL, 4863 4864 /** 4865 * Unicode script "Ogham". 4866 */ 4867 OGHAM, 4868 4869 /** 4870 * Unicode script "Runic". 4871 */ 4872 RUNIC, 4873 4874 /** 4875 * Unicode script "Khmer". 4876 */ 4877 KHMER, 4878 4879 /** 4880 * Unicode script "Mongolian". 4881 */ 4882 MONGOLIAN, 4883 4884 /** 4885 * Unicode script "Hiragana". 4886 */ 4887 HIRAGANA, 4888 4889 /** 4890 * Unicode script "Katakana". 4891 */ 4892 KATAKANA, 4893 4894 /** 4895 * Unicode script "Bopomofo". 4896 */ 4897 BOPOMOFO, 4898 4899 /** 4900 * Unicode script "Han". 4901 */ 4902 HAN, 4903 4904 /** 4905 * Unicode script "Yi". 4906 */ 4907 YI, 4908 4909 /** 4910 * Unicode script "Old_Italic". 4911 */ 4912 OLD_ITALIC, 4913 4914 /** 4915 * Unicode script "Gothic". 4916 */ 4917 GOTHIC, 4918 4919 /** 4920 * Unicode script "Deseret". 4921 */ 4922 DESERET, 4923 4924 /** 4925 * Unicode script "Inherited". 4926 */ 4927 INHERITED, 4928 4929 /** 4930 * Unicode script "Tagalog". 4931 */ 4932 TAGALOG, 4933 4934 /** 4935 * Unicode script "Hanunoo". 4936 */ 4937 HANUNOO, 4938 4939 /** 4940 * Unicode script "Buhid". 4941 */ 4942 BUHID, 4943 4944 /** 4945 * Unicode script "Tagbanwa". 4946 */ 4947 TAGBANWA, 4948 4949 /** 4950 * Unicode script "Limbu". 4951 */ 4952 LIMBU, 4953 4954 /** 4955 * Unicode script "Tai_Le". 4956 */ 4957 TAI_LE, 4958 4959 /** 4960 * Unicode script "Linear_B". 4961 */ 4962 LINEAR_B, 4963 4964 /** 4965 * Unicode script "Ugaritic". 4966 */ 4967 UGARITIC, 4968 4969 /** 4970 * Unicode script "Shavian". 4971 */ 4972 SHAVIAN, 4973 4974 /** 4975 * Unicode script "Osmanya". 4976 */ 4977 OSMANYA, 4978 4979 /** 4980 * Unicode script "Cypriot". 4981 */ 4982 CYPRIOT, 4983 4984 /** 4985 * Unicode script "Braille". 4986 */ 4987 BRAILLE, 4988 4989 /** 4990 * Unicode script "Buginese". 4991 */ 4992 BUGINESE, 4993 4994 /** 4995 * Unicode script "Coptic". 4996 */ 4997 COPTIC, 4998 4999 /** 5000 * Unicode script "New_Tai_Lue". 5001 */ 5002 NEW_TAI_LUE, 5003 5004 /** 5005 * Unicode script "Glagolitic". 5006 */ 5007 GLAGOLITIC, 5008 5009 /** 5010 * Unicode script "Tifinagh". 5011 */ 5012 TIFINAGH, 5013 5014 /** 5015 * Unicode script "Syloti_Nagri". 5016 */ 5017 SYLOTI_NAGRI, 5018 5019 /** 5020 * Unicode script "Old_Persian". 5021 */ 5022 OLD_PERSIAN, 5023 5024 /** 5025 * Unicode script "Kharoshthi". 5026 */ 5027 KHAROSHTHI, 5028 5029 /** 5030 * Unicode script "Balinese". 5031 */ 5032 BALINESE, 5033 5034 /** 5035 * Unicode script "Cuneiform". 5036 */ 5037 CUNEIFORM, 5038 5039 /** 5040 * Unicode script "Phoenician". 5041 */ 5042 PHOENICIAN, 5043 5044 /** 5045 * Unicode script "Phags_Pa". 5046 */ 5047 PHAGS_PA, 5048 5049 /** 5050 * Unicode script "Nko". 5051 */ 5052 NKO, 5053 5054 /** 5055 * Unicode script "Sundanese". 5056 */ 5057 SUNDANESE, 5058 5059 /** 5060 * Unicode script "Batak". 5061 */ 5062 BATAK, 5063 5064 /** 5065 * Unicode script "Lepcha". 5066 */ 5067 LEPCHA, 5068 5069 /** 5070 * Unicode script "Ol_Chiki". 5071 */ 5072 OL_CHIKI, 5073 5074 /** 5075 * Unicode script "Vai". 5076 */ 5077 VAI, 5078 5079 /** 5080 * Unicode script "Saurashtra". 5081 */ 5082 SAURASHTRA, 5083 5084 /** 5085 * Unicode script "Kayah_Li". 5086 */ 5087 KAYAH_LI, 5088 5089 /** 5090 * Unicode script "Rejang". 5091 */ 5092 REJANG, 5093 5094 /** 5095 * Unicode script "Lycian". 5096 */ 5097 LYCIAN, 5098 5099 /** 5100 * Unicode script "Carian". 5101 */ 5102 CARIAN, 5103 5104 /** 5105 * Unicode script "Lydian". 5106 */ 5107 LYDIAN, 5108 5109 /** 5110 * Unicode script "Cham". 5111 */ 5112 CHAM, 5113 5114 /** 5115 * Unicode script "Tai_Tham". 5116 */ 5117 TAI_THAM, 5118 5119 /** 5120 * Unicode script "Tai_Viet". 5121 */ 5122 TAI_VIET, 5123 5124 /** 5125 * Unicode script "Avestan". 5126 */ 5127 AVESTAN, 5128 5129 /** 5130 * Unicode script "Egyptian_Hieroglyphs". 5131 */ 5132 EGYPTIAN_HIEROGLYPHS, 5133 5134 /** 5135 * Unicode script "Samaritan". 5136 */ 5137 SAMARITAN, 5138 5139 /** 5140 * Unicode script "Mandaic". 5141 */ 5142 MANDAIC, 5143 5144 /** 5145 * Unicode script "Lisu". 5146 */ 5147 LISU, 5148 5149 /** 5150 * Unicode script "Bamum". 5151 */ 5152 BAMUM, 5153 5154 /** 5155 * Unicode script "Javanese". 5156 */ 5157 JAVANESE, 5158 5159 /** 5160 * Unicode script "Meetei_Mayek". 5161 */ 5162 MEETEI_MAYEK, 5163 5164 /** 5165 * Unicode script "Imperial_Aramaic". 5166 */ 5167 IMPERIAL_ARAMAIC, 5168 5169 /** 5170 * Unicode script "Old_South_Arabian". 5171 */ 5172 OLD_SOUTH_ARABIAN, 5173 5174 /** 5175 * Unicode script "Inscriptional_Parthian". 5176 */ 5177 INSCRIPTIONAL_PARTHIAN, 5178 5179 /** 5180 * Unicode script "Inscriptional_Pahlavi". 5181 */ 5182 INSCRIPTIONAL_PAHLAVI, 5183 5184 /** 5185 * Unicode script "Old_Turkic". 5186 */ 5187 OLD_TURKIC, 5188 5189 /** 5190 * Unicode script "Brahmi". 5191 */ 5192 BRAHMI, 5193 5194 /** 5195 * Unicode script "Kaithi". 5196 */ 5197 KAITHI, 5198 5199 /** 5200 * Unicode script "Meroitic Hieroglyphs". 5201 * @since 1.8 5202 */ 5203 MEROITIC_HIEROGLYPHS, 5204 5205 /** 5206 * Unicode script "Meroitic Cursive". 5207 * @since 1.8 5208 */ 5209 MEROITIC_CURSIVE, 5210 5211 /** 5212 * Unicode script "Sora Sompeng". 5213 * @since 1.8 5214 */ 5215 SORA_SOMPENG, 5216 5217 /** 5218 * Unicode script "Chakma". 5219 * @since 1.8 5220 */ 5221 CHAKMA, 5222 5223 /** 5224 * Unicode script "Sharada". 5225 * @since 1.8 5226 */ 5227 SHARADA, 5228 5229 /** 5230 * Unicode script "Takri". 5231 * @since 1.8 5232 */ 5233 TAKRI, 5234 5235 /** 5236 * Unicode script "Miao". 5237 * @since 1.8 5238 */ 5239 MIAO, 5240 5241 /** 5242 * Unicode script "Caucasian Albanian". 5243 * @since 9 5244 */ 5245 CAUCASIAN_ALBANIAN, 5246 5247 /** 5248 * Unicode script "Bassa Vah". 5249 * @since 9 5250 */ 5251 BASSA_VAH, 5252 5253 /** 5254 * Unicode script "Duployan". 5255 * @since 9 5256 */ 5257 DUPLOYAN, 5258 5259 /** 5260 * Unicode script "Elbasan". 5261 * @since 9 5262 */ 5263 ELBASAN, 5264 5265 /** 5266 * Unicode script "Grantha". 5267 * @since 9 5268 */ 5269 GRANTHA, 5270 5271 /** 5272 * Unicode script "Pahawh Hmong". 5273 * @since 9 5274 */ 5275 PAHAWH_HMONG, 5276 5277 /** 5278 * Unicode script "Khojki". 5279 * @since 9 5280 */ 5281 KHOJKI, 5282 5283 /** 5284 * Unicode script "Linear A". 5285 * @since 9 5286 */ 5287 LINEAR_A, 5288 5289 /** 5290 * Unicode script "Mahajani". 5291 * @since 9 5292 */ 5293 MAHAJANI, 5294 5295 /** 5296 * Unicode script "Manichaean". 5297 * @since 9 5298 */ 5299 MANICHAEAN, 5300 5301 /** 5302 * Unicode script "Mende Kikakui". 5303 * @since 9 5304 */ 5305 MENDE_KIKAKUI, 5306 5307 /** 5308 * Unicode script "Modi". 5309 * @since 9 5310 */ 5311 MODI, 5312 5313 /** 5314 * Unicode script "Mro". 5315 * @since 9 5316 */ 5317 MRO, 5318 5319 /** 5320 * Unicode script "Old North Arabian". 5321 * @since 9 5322 */ 5323 OLD_NORTH_ARABIAN, 5324 5325 /** 5326 * Unicode script "Nabataean". 5327 * @since 9 5328 */ 5329 NABATAEAN, 5330 5331 /** 5332 * Unicode script "Palmyrene". 5333 * @since 9 5334 */ 5335 PALMYRENE, 5336 5337 /** 5338 * Unicode script "Pau Cin Hau". 5339 * @since 9 5340 */ 5341 PAU_CIN_HAU, 5342 5343 /** 5344 * Unicode script "Old Permic". 5345 * @since 9 5346 */ 5347 OLD_PERMIC, 5348 5349 /** 5350 * Unicode script "Psalter Pahlavi". 5351 * @since 9 5352 */ 5353 PSALTER_PAHLAVI, 5354 5355 /** 5356 * Unicode script "Siddham". 5357 * @since 9 5358 */ 5359 SIDDHAM, 5360 5361 /** 5362 * Unicode script "Khudawadi". 5363 * @since 9 5364 */ 5365 KHUDAWADI, 5366 5367 /** 5368 * Unicode script "Tirhuta". 5369 * @since 9 5370 */ 5371 TIRHUTA, 5372 5373 /** 5374 * Unicode script "Warang Citi". 5375 * @since 9 5376 */ 5377 WARANG_CITI, 5378 5379 /** 5380 * Unicode script "Ahom". 5381 * @since 9 5382 */ 5383 AHOM, 5384 5385 /** 5386 * Unicode script "Anatolian Hieroglyphs". 5387 * @since 9 5388 */ 5389 ANATOLIAN_HIEROGLYPHS, 5390 5391 /** 5392 * Unicode script "Hatran". 5393 * @since 9 5394 */ 5395 HATRAN, 5396 5397 /** 5398 * Unicode script "Multani". 5399 * @since 9 5400 */ 5401 MULTANI, 5402 5403 /** 5404 * Unicode script "Old Hungarian". 5405 * @since 9 5406 */ 5407 OLD_HUNGARIAN, 5408 5409 /** 5410 * Unicode script "SignWriting". 5411 * @since 9 5412 */ 5413 SIGNWRITING, 5414 5415 /** 5416 * Unicode script "Adlam". 5417 * @since 11 5418 */ 5419 ADLAM, 5420 5421 /** 5422 * Unicode script "Bhaiksuki". 5423 * @since 11 5424 */ 5425 BHAIKSUKI, 5426 5427 /** 5428 * Unicode script "Marchen". 5429 * @since 11 5430 */ 5431 MARCHEN, 5432 5433 /** 5434 * Unicode script "Newa". 5435 * @since 11 5436 */ 5437 NEWA, 5438 5439 /** 5440 * Unicode script "Osage". 5441 * @since 11 5442 */ 5443 OSAGE, 5444 5445 /** 5446 * Unicode script "Tangut". 5447 * @since 11 5448 */ 5449 TANGUT, 5450 5451 /** 5452 * Unicode script "Masaram Gondi". 5453 * @since 11 5454 */ 5455 MASARAM_GONDI, 5456 5457 /** 5458 * Unicode script "Nushu". 5459 * @since 11 5460 */ 5461 NUSHU, 5462 5463 /** 5464 * Unicode script "Soyombo". 5465 * @since 11 5466 */ 5467 SOYOMBO, 5468 5469 /** 5470 * Unicode script "Zanabazar Square". 5471 * @since 11 5472 */ 5473 ZANABAZAR_SQUARE, 5474 5475 /** 5476 * Unicode script "Hanifi Rohingya". 5477 * @since 12 5478 */ 5479 HANIFI_ROHINGYA, 5480 5481 /** 5482 * Unicode script "Old Sogdian". 5483 * @since 12 5484 */ 5485 OLD_SOGDIAN, 5486 5487 /** 5488 * Unicode script "Sogdian". 5489 * @since 12 5490 */ 5491 SOGDIAN, 5492 5493 /** 5494 * Unicode script "Dogra". 5495 * @since 12 5496 */ 5497 DOGRA, 5498 5499 /** 5500 * Unicode script "Gunjala Gondi". 5501 * @since 12 5502 */ 5503 GUNJALA_GONDI, 5504 5505 /** 5506 * Unicode script "Makasar". 5507 * @since 12 5508 */ 5509 MAKASAR, 5510 5511 /** 5512 * Unicode script "Medefaidrin". 5513 * @since 12 5514 */ 5515 MEDEFAIDRIN, 5516 5517 /** 5518 * Unicode script "Elymaic". 5519 * @since 13 5520 */ 5521 ELYMAIC, 5522 5523 /** 5524 * Unicode script "Nandinagari". 5525 * @since 13 5526 */ 5527 NANDINAGARI, 5528 5529 /** 5530 * Unicode script "Nyiakeng Puachue Hmong". 5531 * @since 13 5532 */ 5533 NYIAKENG_PUACHUE_HMONG, 5534 5535 /** 5536 * Unicode script "Wancho". 5537 * @since 13 5538 */ 5539 WANCHO, 5540 5541 /** 5542 * Unicode script "Yezidi". 5543 * @since 15 5544 */ 5545 YEZIDI, 5546 5547 /** 5548 * Unicode script "Chorasmian". 5549 * @since 15 5550 */ 5551 CHORASMIAN, 5552 5553 /** 5554 * Unicode script "Dives Akuru". 5555 * @since 15 5556 */ 5557 DIVES_AKURU, 5558 5559 /** 5560 * Unicode script "Khitan Small Script". 5561 * @since 15 5562 */ 5563 KHITAN_SMALL_SCRIPT, 5564 5565 /** 5566 * Unicode script "Vithkuqi". 5567 * @since 19 5568 */ 5569 VITHKUQI, 5570 5571 /** 5572 * Unicode script "Old Uyghur". 5573 * @since 19 5574 */ 5575 OLD_UYGHUR, 5576 5577 /** 5578 * Unicode script "Cypro Minoan". 5579 * @since 19 5580 */ 5581 CYPRO_MINOAN, 5582 5583 /** 5584 * Unicode script "Tangsa". 5585 * @since 19 5586 */ 5587 TANGSA, 5588 5589 /** 5590 * Unicode script "Toto". 5591 * @since 19 5592 */ 5593 TOTO, 5594 5595 /** 5596 * Unicode script "Kawi". 5597 * @since 20 5598 */ 5599 KAWI, 5600 5601 /** 5602 * Unicode script "Nag Mundari". 5603 * @since 20 5604 */ 5605 NAG_MUNDARI, 5606 5607 /** 5608 * Unicode script "Todhri". 5609 * @since 24 5610 */ 5611 TODHRI, 5612 5613 /** 5614 * Unicode script "Garay". 5615 * @since 24 5616 */ 5617 GARAY, 5618 5619 /** 5620 * Unicode script "Tulu Tigalari". 5621 * @since 24 5622 */ 5623 TULU_TIGALARI, 5624 5625 /** 5626 * Unicode script "Sunuwar". 5627 * @since 24 5628 */ 5629 SUNUWAR, 5630 5631 /** 5632 * Unicode script "Gurung Khema". 5633 * @since 24 5634 */ 5635 GURUNG_KHEMA, 5636 5637 /** 5638 * Unicode script "Kirat Rai". 5639 * @since 24 5640 */ 5641 KIRAT_RAI, 5642 5643 /** 5644 * Unicode script "Ol Onal". 5645 * @since 24 5646 */ 5647 OL_ONAL, 5648 5649 /** 5650 * Unicode script "Sidetic". 5651 * @since 26 5652 */ 5653 SIDETIC, 5654 5655 /** 5656 * Unicode script "Tolong Siki". 5657 * @since 26 5658 */ 5659 TOLONG_SIKI, 5660 5661 /** 5662 * Unicode script "Beria Erfe". 5663 * @since 26 5664 */ 5665 BERIA_ERFE, 5666 5667 /** 5668 * Unicode script "Tai Yo". 5669 * @since 26 5670 */ 5671 TAI_YO, 5672 5673 /** 5674 * Unicode script "Unknown". 5675 */ 5676 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5677 5678 private static final int[] scriptStarts = { 5679 0x0000, // 0000..0040; COMMON 5680 0x0041, // 0041..005A; LATIN 5681 0x005B, // 005B..0060; COMMON 5682 0x0061, // 0061..007A; LATIN 5683 0x007B, // 007B..00A9; COMMON 5684 0x00AA, // 00AA ; LATIN 5685 0x00AB, // 00AB..00B9; COMMON 5686 0x00BA, // 00BA ; LATIN 5687 0x00BB, // 00BB..00BF; COMMON 5688 0x00C0, // 00C0..00D6; LATIN 5689 0x00D7, // 00D7 ; COMMON 5690 0x00D8, // 00D8..00F6; LATIN 5691 0x00F7, // 00F7 ; COMMON 5692 0x00F8, // 00F8..02B8; LATIN 5693 0x02B9, // 02B9..02DF; COMMON 5694 0x02E0, // 02E0..02E4; LATIN 5695 0x02E5, // 02E5..02E9; COMMON 5696 0x02EA, // 02EA..02EB; BOPOMOFO 5697 0x02EC, // 02EC..02FF; COMMON 5698 0x0300, // 0300..036F; INHERITED 5699 0x0370, // 0370..0373; GREEK 5700 0x0374, // 0374 ; COMMON 5701 0x0375, // 0375..0377; GREEK 5702 0x0378, // 0378..0379; UNKNOWN 5703 0x037A, // 037A..037D; GREEK 5704 0x037E, // 037E ; COMMON 5705 0x037F, // 037F ; GREEK 5706 0x0380, // 0380..0383; UNKNOWN 5707 0x0384, // 0384 ; GREEK 5708 0x0385, // 0385 ; COMMON 5709 0x0386, // 0386 ; GREEK 5710 0x0387, // 0387 ; COMMON 5711 0x0388, // 0388..038A; GREEK 5712 0x038B, // 038B ; UNKNOWN 5713 0x038C, // 038C ; GREEK 5714 0x038D, // 038D ; UNKNOWN 5715 0x038E, // 038E..03A1; GREEK 5716 0x03A2, // 03A2 ; UNKNOWN 5717 0x03A3, // 03A3..03E1; GREEK 5718 0x03E2, // 03E2..03EF; COPTIC 5719 0x03F0, // 03F0..03FF; GREEK 5720 0x0400, // 0400..0484; CYRILLIC 5721 0x0485, // 0485..0486; INHERITED 5722 0x0487, // 0487..052F; CYRILLIC 5723 0x0530, // 0530 ; UNKNOWN 5724 0x0531, // 0531..0556; ARMENIAN 5725 0x0557, // 0557..0558; UNKNOWN 5726 0x0559, // 0559..058A; ARMENIAN 5727 0x058B, // 058B..058C; UNKNOWN 5728 0x058D, // 058D..058F; ARMENIAN 5729 0x0590, // 0590 ; UNKNOWN 5730 0x0591, // 0591..05C7; HEBREW 5731 0x05C8, // 05C8..05CF; UNKNOWN 5732 0x05D0, // 05D0..05EA; HEBREW 5733 0x05EB, // 05EB..05EE; UNKNOWN 5734 0x05EF, // 05EF..05F4; HEBREW 5735 0x05F5, // 05F5..05FF; UNKNOWN 5736 0x0600, // 0600..0604; ARABIC 5737 0x0605, // 0605 ; COMMON 5738 0x0606, // 0606..060B; ARABIC 5739 0x060C, // 060C ; COMMON 5740 0x060D, // 060D..061A; ARABIC 5741 0x061B, // 061B ; COMMON 5742 0x061C, // 061C..061E; ARABIC 5743 0x061F, // 061F ; COMMON 5744 0x0620, // 0620..063F; ARABIC 5745 0x0640, // 0640 ; COMMON 5746 0x0641, // 0641..064A; ARABIC 5747 0x064B, // 064B..0655; INHERITED 5748 0x0656, // 0656..066F; ARABIC 5749 0x0670, // 0670 ; INHERITED 5750 0x0671, // 0671..06DC; ARABIC 5751 0x06DD, // 06DD ; COMMON 5752 0x06DE, // 06DE..06FF; ARABIC 5753 0x0700, // 0700..070D; SYRIAC 5754 0x070E, // 070E ; UNKNOWN 5755 0x070F, // 070F..074A; SYRIAC 5756 0x074B, // 074B..074C; UNKNOWN 5757 0x074D, // 074D..074F; SYRIAC 5758 0x0750, // 0750..077F; ARABIC 5759 0x0780, // 0780..07B1; THAANA 5760 0x07B2, // 07B2..07BF; UNKNOWN 5761 0x07C0, // 07C0..07FA; NKO 5762 0x07FB, // 07FB..07FC; UNKNOWN 5763 0x07FD, // 07FD..07FF; NKO 5764 0x0800, // 0800..082D; SAMARITAN 5765 0x082E, // 082E..082F; UNKNOWN 5766 0x0830, // 0830..083E; SAMARITAN 5767 0x083F, // 083F ; UNKNOWN 5768 0x0840, // 0840..085B; MANDAIC 5769 0x085C, // 085C..085D; UNKNOWN 5770 0x085E, // 085E ; MANDAIC 5771 0x085F, // 085F ; UNKNOWN 5772 0x0860, // 0860..086A; SYRIAC 5773 0x086B, // 086B..086F; UNKNOWN 5774 0x0870, // 0870..0891; ARABIC 5775 0x0892, // 0892..0896; UNKNOWN 5776 0x0897, // 0897..08E1; ARABIC 5777 0x08E2, // 08E2 ; COMMON 5778 0x08E3, // 08E3..08FF; ARABIC 5779 0x0900, // 0900..0950; DEVANAGARI 5780 0x0951, // 0951..0954; INHERITED 5781 0x0955, // 0955..0963; DEVANAGARI 5782 0x0964, // 0964..0965; COMMON 5783 0x0966, // 0966..097F; DEVANAGARI 5784 0x0980, // 0980..0983; BENGALI 5785 0x0984, // 0984 ; UNKNOWN 5786 0x0985, // 0985..098C; BENGALI 5787 0x098D, // 098D..098E; UNKNOWN 5788 0x098F, // 098F..0990; BENGALI 5789 0x0991, // 0991..0992; UNKNOWN 5790 0x0993, // 0993..09A8; BENGALI 5791 0x09A9, // 09A9 ; UNKNOWN 5792 0x09AA, // 09AA..09B0; BENGALI 5793 0x09B1, // 09B1 ; UNKNOWN 5794 0x09B2, // 09B2 ; BENGALI 5795 0x09B3, // 09B3..09B5; UNKNOWN 5796 0x09B6, // 09B6..09B9; BENGALI 5797 0x09BA, // 09BA..09BB; UNKNOWN 5798 0x09BC, // 09BC..09C4; BENGALI 5799 0x09C5, // 09C5..09C6; UNKNOWN 5800 0x09C7, // 09C7..09C8; BENGALI 5801 0x09C9, // 09C9..09CA; UNKNOWN 5802 0x09CB, // 09CB..09CE; BENGALI 5803 0x09CF, // 09CF..09D6; UNKNOWN 5804 0x09D7, // 09D7 ; BENGALI 5805 0x09D8, // 09D8..09DB; UNKNOWN 5806 0x09DC, // 09DC..09DD; BENGALI 5807 0x09DE, // 09DE ; UNKNOWN 5808 0x09DF, // 09DF..09E3; BENGALI 5809 0x09E4, // 09E4..09E5; UNKNOWN 5810 0x09E6, // 09E6..09FE; BENGALI 5811 0x09FF, // 09FF..0A00; UNKNOWN 5812 0x0A01, // 0A01..0A03; GURMUKHI 5813 0x0A04, // 0A04 ; UNKNOWN 5814 0x0A05, // 0A05..0A0A; GURMUKHI 5815 0x0A0B, // 0A0B..0A0E; UNKNOWN 5816 0x0A0F, // 0A0F..0A10; GURMUKHI 5817 0x0A11, // 0A11..0A12; UNKNOWN 5818 0x0A13, // 0A13..0A28; GURMUKHI 5819 0x0A29, // 0A29 ; UNKNOWN 5820 0x0A2A, // 0A2A..0A30; GURMUKHI 5821 0x0A31, // 0A31 ; UNKNOWN 5822 0x0A32, // 0A32..0A33; GURMUKHI 5823 0x0A34, // 0A34 ; UNKNOWN 5824 0x0A35, // 0A35..0A36; GURMUKHI 5825 0x0A37, // 0A37 ; UNKNOWN 5826 0x0A38, // 0A38..0A39; GURMUKHI 5827 0x0A3A, // 0A3A..0A3B; UNKNOWN 5828 0x0A3C, // 0A3C ; GURMUKHI 5829 0x0A3D, // 0A3D ; UNKNOWN 5830 0x0A3E, // 0A3E..0A42; GURMUKHI 5831 0x0A43, // 0A43..0A46; UNKNOWN 5832 0x0A47, // 0A47..0A48; GURMUKHI 5833 0x0A49, // 0A49..0A4A; UNKNOWN 5834 0x0A4B, // 0A4B..0A4D; GURMUKHI 5835 0x0A4E, // 0A4E..0A50; UNKNOWN 5836 0x0A51, // 0A51 ; GURMUKHI 5837 0x0A52, // 0A52..0A58; UNKNOWN 5838 0x0A59, // 0A59..0A5C; GURMUKHI 5839 0x0A5D, // 0A5D ; UNKNOWN 5840 0x0A5E, // 0A5E ; GURMUKHI 5841 0x0A5F, // 0A5F..0A65; UNKNOWN 5842 0x0A66, // 0A66..0A76; GURMUKHI 5843 0x0A77, // 0A77..0A80; UNKNOWN 5844 0x0A81, // 0A81..0A83; GUJARATI 5845 0x0A84, // 0A84 ; UNKNOWN 5846 0x0A85, // 0A85..0A8D; GUJARATI 5847 0x0A8E, // 0A8E ; UNKNOWN 5848 0x0A8F, // 0A8F..0A91; GUJARATI 5849 0x0A92, // 0A92 ; UNKNOWN 5850 0x0A93, // 0A93..0AA8; GUJARATI 5851 0x0AA9, // 0AA9 ; UNKNOWN 5852 0x0AAA, // 0AAA..0AB0; GUJARATI 5853 0x0AB1, // 0AB1 ; UNKNOWN 5854 0x0AB2, // 0AB2..0AB3; GUJARATI 5855 0x0AB4, // 0AB4 ; UNKNOWN 5856 0x0AB5, // 0AB5..0AB9; GUJARATI 5857 0x0ABA, // 0ABA..0ABB; UNKNOWN 5858 0x0ABC, // 0ABC..0AC5; GUJARATI 5859 0x0AC6, // 0AC6 ; UNKNOWN 5860 0x0AC7, // 0AC7..0AC9; GUJARATI 5861 0x0ACA, // 0ACA ; UNKNOWN 5862 0x0ACB, // 0ACB..0ACD; GUJARATI 5863 0x0ACE, // 0ACE..0ACF; UNKNOWN 5864 0x0AD0, // 0AD0 ; GUJARATI 5865 0x0AD1, // 0AD1..0ADF; UNKNOWN 5866 0x0AE0, // 0AE0..0AE3; GUJARATI 5867 0x0AE4, // 0AE4..0AE5; UNKNOWN 5868 0x0AE6, // 0AE6..0AF1; GUJARATI 5869 0x0AF2, // 0AF2..0AF8; UNKNOWN 5870 0x0AF9, // 0AF9..0AFF; GUJARATI 5871 0x0B00, // 0B00 ; UNKNOWN 5872 0x0B01, // 0B01..0B03; ORIYA 5873 0x0B04, // 0B04 ; UNKNOWN 5874 0x0B05, // 0B05..0B0C; ORIYA 5875 0x0B0D, // 0B0D..0B0E; UNKNOWN 5876 0x0B0F, // 0B0F..0B10; ORIYA 5877 0x0B11, // 0B11..0B12; UNKNOWN 5878 0x0B13, // 0B13..0B28; ORIYA 5879 0x0B29, // 0B29 ; UNKNOWN 5880 0x0B2A, // 0B2A..0B30; ORIYA 5881 0x0B31, // 0B31 ; UNKNOWN 5882 0x0B32, // 0B32..0B33; ORIYA 5883 0x0B34, // 0B34 ; UNKNOWN 5884 0x0B35, // 0B35..0B39; ORIYA 5885 0x0B3A, // 0B3A..0B3B; UNKNOWN 5886 0x0B3C, // 0B3C..0B44; ORIYA 5887 0x0B45, // 0B45..0B46; UNKNOWN 5888 0x0B47, // 0B47..0B48; ORIYA 5889 0x0B49, // 0B49..0B4A; UNKNOWN 5890 0x0B4B, // 0B4B..0B4D; ORIYA 5891 0x0B4E, // 0B4E..0B54; UNKNOWN 5892 0x0B55, // 0B55..0B57; ORIYA 5893 0x0B58, // 0B58..0B5B; UNKNOWN 5894 0x0B5C, // 0B5C..0B5D; ORIYA 5895 0x0B5E, // 0B5E ; UNKNOWN 5896 0x0B5F, // 0B5F..0B63; ORIYA 5897 0x0B64, // 0B64..0B65; UNKNOWN 5898 0x0B66, // 0B66..0B77; ORIYA 5899 0x0B78, // 0B78..0B81; UNKNOWN 5900 0x0B82, // 0B82..0B83; TAMIL 5901 0x0B84, // 0B84 ; UNKNOWN 5902 0x0B85, // 0B85..0B8A; TAMIL 5903 0x0B8B, // 0B8B..0B8D; UNKNOWN 5904 0x0B8E, // 0B8E..0B90; TAMIL 5905 0x0B91, // 0B91 ; UNKNOWN 5906 0x0B92, // 0B92..0B95; TAMIL 5907 0x0B96, // 0B96..0B98; UNKNOWN 5908 0x0B99, // 0B99..0B9A; TAMIL 5909 0x0B9B, // 0B9B ; UNKNOWN 5910 0x0B9C, // 0B9C ; TAMIL 5911 0x0B9D, // 0B9D ; UNKNOWN 5912 0x0B9E, // 0B9E..0B9F; TAMIL 5913 0x0BA0, // 0BA0..0BA2; UNKNOWN 5914 0x0BA3, // 0BA3..0BA4; TAMIL 5915 0x0BA5, // 0BA5..0BA7; UNKNOWN 5916 0x0BA8, // 0BA8..0BAA; TAMIL 5917 0x0BAB, // 0BAB..0BAD; UNKNOWN 5918 0x0BAE, // 0BAE..0BB9; TAMIL 5919 0x0BBA, // 0BBA..0BBD; UNKNOWN 5920 0x0BBE, // 0BBE..0BC2; TAMIL 5921 0x0BC3, // 0BC3..0BC5; UNKNOWN 5922 0x0BC6, // 0BC6..0BC8; TAMIL 5923 0x0BC9, // 0BC9 ; UNKNOWN 5924 0x0BCA, // 0BCA..0BCD; TAMIL 5925 0x0BCE, // 0BCE..0BCF; UNKNOWN 5926 0x0BD0, // 0BD0 ; TAMIL 5927 0x0BD1, // 0BD1..0BD6; UNKNOWN 5928 0x0BD7, // 0BD7 ; TAMIL 5929 0x0BD8, // 0BD8..0BE5; UNKNOWN 5930 0x0BE6, // 0BE6..0BFA; TAMIL 5931 0x0BFB, // 0BFB..0BFF; UNKNOWN 5932 0x0C00, // 0C00..0C0C; TELUGU 5933 0x0C0D, // 0C0D ; UNKNOWN 5934 0x0C0E, // 0C0E..0C10; TELUGU 5935 0x0C11, // 0C11 ; UNKNOWN 5936 0x0C12, // 0C12..0C28; TELUGU 5937 0x0C29, // 0C29 ; UNKNOWN 5938 0x0C2A, // 0C2A..0C39; TELUGU 5939 0x0C3A, // 0C3A..0C3B; UNKNOWN 5940 0x0C3C, // 0C3C..0C44; TELUGU 5941 0x0C45, // 0C45 ; UNKNOWN 5942 0x0C46, // 0C46..0C48; TELUGU 5943 0x0C49, // 0C49 ; UNKNOWN 5944 0x0C4A, // 0C4A..0C4D; TELUGU 5945 0x0C4E, // 0C4E..0C54; UNKNOWN 5946 0x0C55, // 0C55..0C56; TELUGU 5947 0x0C57, // 0C57 ; UNKNOWN 5948 0x0C58, // 0C58..0C5A; TELUGU 5949 0x0C5B, // 0C5B ; UNKNOWN 5950 0x0C5C, // 0C5C..0C5D; TELUGU 5951 0x0C5E, // 0C5E..0C5F; UNKNOWN 5952 0x0C60, // 0C60..0C63; TELUGU 5953 0x0C64, // 0C64..0C65; UNKNOWN 5954 0x0C66, // 0C66..0C6F; TELUGU 5955 0x0C70, // 0C70..0C76; UNKNOWN 5956 0x0C77, // 0C77..0C7F; TELUGU 5957 0x0C80, // 0C80..0C8C; KANNADA 5958 0x0C8D, // 0C8D ; UNKNOWN 5959 0x0C8E, // 0C8E..0C90; KANNADA 5960 0x0C91, // 0C91 ; UNKNOWN 5961 0x0C92, // 0C92..0CA8; KANNADA 5962 0x0CA9, // 0CA9 ; UNKNOWN 5963 0x0CAA, // 0CAA..0CB3; KANNADA 5964 0x0CB4, // 0CB4 ; UNKNOWN 5965 0x0CB5, // 0CB5..0CB9; KANNADA 5966 0x0CBA, // 0CBA..0CBB; UNKNOWN 5967 0x0CBC, // 0CBC..0CC4; KANNADA 5968 0x0CC5, // 0CC5 ; UNKNOWN 5969 0x0CC6, // 0CC6..0CC8; KANNADA 5970 0x0CC9, // 0CC9 ; UNKNOWN 5971 0x0CCA, // 0CCA..0CCD; KANNADA 5972 0x0CCE, // 0CCE..0CD4; UNKNOWN 5973 0x0CD5, // 0CD5..0CD6; KANNADA 5974 0x0CD7, // 0CD7..0CDB; UNKNOWN 5975 0x0CDC, // 0CDC..0CDE; KANNADA 5976 0x0CDF, // 0CDF ; UNKNOWN 5977 0x0CE0, // 0CE0..0CE3; KANNADA 5978 0x0CE4, // 0CE4..0CE5; UNKNOWN 5979 0x0CE6, // 0CE6..0CEF; KANNADA 5980 0x0CF0, // 0CF0 ; UNKNOWN 5981 0x0CF1, // 0CF1..0CF3; KANNADA 5982 0x0CF4, // 0CF4..0CFF; UNKNOWN 5983 0x0D00, // 0D00..0D0C; MALAYALAM 5984 0x0D0D, // 0D0D ; UNKNOWN 5985 0x0D0E, // 0D0E..0D10; MALAYALAM 5986 0x0D11, // 0D11 ; UNKNOWN 5987 0x0D12, // 0D12..0D44; MALAYALAM 5988 0x0D45, // 0D45 ; UNKNOWN 5989 0x0D46, // 0D46..0D48; MALAYALAM 5990 0x0D49, // 0D49 ; UNKNOWN 5991 0x0D4A, // 0D4A..0D4F; MALAYALAM 5992 0x0D50, // 0D50..0D53; UNKNOWN 5993 0x0D54, // 0D54..0D63; MALAYALAM 5994 0x0D64, // 0D64..0D65; UNKNOWN 5995 0x0D66, // 0D66..0D7F; MALAYALAM 5996 0x0D80, // 0D80 ; UNKNOWN 5997 0x0D81, // 0D81..0D83; SINHALA 5998 0x0D84, // 0D84 ; UNKNOWN 5999 0x0D85, // 0D85..0D96; SINHALA 6000 0x0D97, // 0D97..0D99; UNKNOWN 6001 0x0D9A, // 0D9A..0DB1; SINHALA 6002 0x0DB2, // 0DB2 ; UNKNOWN 6003 0x0DB3, // 0DB3..0DBB; SINHALA 6004 0x0DBC, // 0DBC ; UNKNOWN 6005 0x0DBD, // 0DBD ; SINHALA 6006 0x0DBE, // 0DBE..0DBF; UNKNOWN 6007 0x0DC0, // 0DC0..0DC6; SINHALA 6008 0x0DC7, // 0DC7..0DC9; UNKNOWN 6009 0x0DCA, // 0DCA ; SINHALA 6010 0x0DCB, // 0DCB..0DCE; UNKNOWN 6011 0x0DCF, // 0DCF..0DD4; SINHALA 6012 0x0DD5, // 0DD5 ; UNKNOWN 6013 0x0DD6, // 0DD6 ; SINHALA 6014 0x0DD7, // 0DD7 ; UNKNOWN 6015 0x0DD8, // 0DD8..0DDF; SINHALA 6016 0x0DE0, // 0DE0..0DE5; UNKNOWN 6017 0x0DE6, // 0DE6..0DEF; SINHALA 6018 0x0DF0, // 0DF0..0DF1; UNKNOWN 6019 0x0DF2, // 0DF2..0DF4; SINHALA 6020 0x0DF5, // 0DF5..0E00; UNKNOWN 6021 0x0E01, // 0E01..0E3A; THAI 6022 0x0E3B, // 0E3B..0E3E; UNKNOWN 6023 0x0E3F, // 0E3F ; COMMON 6024 0x0E40, // 0E40..0E5B; THAI 6025 0x0E5C, // 0E5C..0E80; UNKNOWN 6026 0x0E81, // 0E81..0E82; LAO 6027 0x0E83, // 0E83 ; UNKNOWN 6028 0x0E84, // 0E84 ; LAO 6029 0x0E85, // 0E85 ; UNKNOWN 6030 0x0E86, // 0E86..0E8A; LAO 6031 0x0E8B, // 0E8B ; UNKNOWN 6032 0x0E8C, // 0E8C..0EA3; LAO 6033 0x0EA4, // 0EA4 ; UNKNOWN 6034 0x0EA5, // 0EA5 ; LAO 6035 0x0EA6, // 0EA6 ; UNKNOWN 6036 0x0EA7, // 0EA7..0EBD; LAO 6037 0x0EBE, // 0EBE..0EBF; UNKNOWN 6038 0x0EC0, // 0EC0..0EC4; LAO 6039 0x0EC5, // 0EC5 ; UNKNOWN 6040 0x0EC6, // 0EC6 ; LAO 6041 0x0EC7, // 0EC7 ; UNKNOWN 6042 0x0EC8, // 0EC8..0ECE; LAO 6043 0x0ECF, // 0ECF ; UNKNOWN 6044 0x0ED0, // 0ED0..0ED9; LAO 6045 0x0EDA, // 0EDA..0EDB; UNKNOWN 6046 0x0EDC, // 0EDC..0EDF; LAO 6047 0x0EE0, // 0EE0..0EFF; UNKNOWN 6048 0x0F00, // 0F00..0F47; TIBETAN 6049 0x0F48, // 0F48 ; UNKNOWN 6050 0x0F49, // 0F49..0F6C; TIBETAN 6051 0x0F6D, // 0F6D..0F70; UNKNOWN 6052 0x0F71, // 0F71..0F97; TIBETAN 6053 0x0F98, // 0F98 ; UNKNOWN 6054 0x0F99, // 0F99..0FBC; TIBETAN 6055 0x0FBD, // 0FBD ; UNKNOWN 6056 0x0FBE, // 0FBE..0FCC; TIBETAN 6057 0x0FCD, // 0FCD ; UNKNOWN 6058 0x0FCE, // 0FCE..0FD4; TIBETAN 6059 0x0FD5, // 0FD5..0FD8; COMMON 6060 0x0FD9, // 0FD9..0FDA; TIBETAN 6061 0x0FDB, // 0FDB..0FFF; UNKNOWN 6062 0x1000, // 1000..109F; MYANMAR 6063 0x10A0, // 10A0..10C5; GEORGIAN 6064 0x10C6, // 10C6 ; UNKNOWN 6065 0x10C7, // 10C7 ; GEORGIAN 6066 0x10C8, // 10C8..10CC; UNKNOWN 6067 0x10CD, // 10CD ; GEORGIAN 6068 0x10CE, // 10CE..10CF; UNKNOWN 6069 0x10D0, // 10D0..10FA; GEORGIAN 6070 0x10FB, // 10FB ; COMMON 6071 0x10FC, // 10FC..10FF; GEORGIAN 6072 0x1100, // 1100..11FF; HANGUL 6073 0x1200, // 1200..1248; ETHIOPIC 6074 0x1249, // 1249 ; UNKNOWN 6075 0x124A, // 124A..124D; ETHIOPIC 6076 0x124E, // 124E..124F; UNKNOWN 6077 0x1250, // 1250..1256; ETHIOPIC 6078 0x1257, // 1257 ; UNKNOWN 6079 0x1258, // 1258 ; ETHIOPIC 6080 0x1259, // 1259 ; UNKNOWN 6081 0x125A, // 125A..125D; ETHIOPIC 6082 0x125E, // 125E..125F; UNKNOWN 6083 0x1260, // 1260..1288; ETHIOPIC 6084 0x1289, // 1289 ; UNKNOWN 6085 0x128A, // 128A..128D; ETHIOPIC 6086 0x128E, // 128E..128F; UNKNOWN 6087 0x1290, // 1290..12B0; ETHIOPIC 6088 0x12B1, // 12B1 ; UNKNOWN 6089 0x12B2, // 12B2..12B5; ETHIOPIC 6090 0x12B6, // 12B6..12B7; UNKNOWN 6091 0x12B8, // 12B8..12BE; ETHIOPIC 6092 0x12BF, // 12BF ; UNKNOWN 6093 0x12C0, // 12C0 ; ETHIOPIC 6094 0x12C1, // 12C1 ; UNKNOWN 6095 0x12C2, // 12C2..12C5; ETHIOPIC 6096 0x12C6, // 12C6..12C7; UNKNOWN 6097 0x12C8, // 12C8..12D6; ETHIOPIC 6098 0x12D7, // 12D7 ; UNKNOWN 6099 0x12D8, // 12D8..1310; ETHIOPIC 6100 0x1311, // 1311 ; UNKNOWN 6101 0x1312, // 1312..1315; ETHIOPIC 6102 0x1316, // 1316..1317; UNKNOWN 6103 0x1318, // 1318..135A; ETHIOPIC 6104 0x135B, // 135B..135C; UNKNOWN 6105 0x135D, // 135D..137C; ETHIOPIC 6106 0x137D, // 137D..137F; UNKNOWN 6107 0x1380, // 1380..1399; ETHIOPIC 6108 0x139A, // 139A..139F; UNKNOWN 6109 0x13A0, // 13A0..13F5; CHEROKEE 6110 0x13F6, // 13F6..13F7; UNKNOWN 6111 0x13F8, // 13F8..13FD; CHEROKEE 6112 0x13FE, // 13FE..13FF; UNKNOWN 6113 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 6114 0x1680, // 1680..169C; OGHAM 6115 0x169D, // 169D..169F; UNKNOWN 6116 0x16A0, // 16A0..16EA; RUNIC 6117 0x16EB, // 16EB..16ED; COMMON 6118 0x16EE, // 16EE..16F8; RUNIC 6119 0x16F9, // 16F9..16FF; UNKNOWN 6120 0x1700, // 1700..1715; TAGALOG 6121 0x1716, // 1716..171E; UNKNOWN 6122 0x171F, // 171F ; TAGALOG 6123 0x1720, // 1720..1734; HANUNOO 6124 0x1735, // 1735..1736; COMMON 6125 0x1737, // 1737..173F; UNKNOWN 6126 0x1740, // 1740..1753; BUHID 6127 0x1754, // 1754..175F; UNKNOWN 6128 0x1760, // 1760..176C; TAGBANWA 6129 0x176D, // 176D ; UNKNOWN 6130 0x176E, // 176E..1770; TAGBANWA 6131 0x1771, // 1771 ; UNKNOWN 6132 0x1772, // 1772..1773; TAGBANWA 6133 0x1774, // 1774..177F; UNKNOWN 6134 0x1780, // 1780..17DD; KHMER 6135 0x17DE, // 17DE..17DF; UNKNOWN 6136 0x17E0, // 17E0..17E9; KHMER 6137 0x17EA, // 17EA..17EF; UNKNOWN 6138 0x17F0, // 17F0..17F9; KHMER 6139 0x17FA, // 17FA..17FF; UNKNOWN 6140 0x1800, // 1800..1801; MONGOLIAN 6141 0x1802, // 1802..1803; COMMON 6142 0x1804, // 1804 ; MONGOLIAN 6143 0x1805, // 1805 ; COMMON 6144 0x1806, // 1806..1819; MONGOLIAN 6145 0x181A, // 181A..181F; UNKNOWN 6146 0x1820, // 1820..1878; MONGOLIAN 6147 0x1879, // 1879..187F; UNKNOWN 6148 0x1880, // 1880..18AA; MONGOLIAN 6149 0x18AB, // 18AB..18AF; UNKNOWN 6150 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 6151 0x18F6, // 18F6..18FF; UNKNOWN 6152 0x1900, // 1900..191E; LIMBU 6153 0x191F, // 191F ; UNKNOWN 6154 0x1920, // 1920..192B; LIMBU 6155 0x192C, // 192C..192F; UNKNOWN 6156 0x1930, // 1930..193B; LIMBU 6157 0x193C, // 193C..193F; UNKNOWN 6158 0x1940, // 1940 ; LIMBU 6159 0x1941, // 1941..1943; UNKNOWN 6160 0x1944, // 1944..194F; LIMBU 6161 0x1950, // 1950..196D; TAI_LE 6162 0x196E, // 196E..196F; UNKNOWN 6163 0x1970, // 1970..1974; TAI_LE 6164 0x1975, // 1975..197F; UNKNOWN 6165 0x1980, // 1980..19AB; NEW_TAI_LUE 6166 0x19AC, // 19AC..19AF; UNKNOWN 6167 0x19B0, // 19B0..19C9; NEW_TAI_LUE 6168 0x19CA, // 19CA..19CF; UNKNOWN 6169 0x19D0, // 19D0..19DA; NEW_TAI_LUE 6170 0x19DB, // 19DB..19DD; UNKNOWN 6171 0x19DE, // 19DE..19DF; NEW_TAI_LUE 6172 0x19E0, // 19E0..19FF; KHMER 6173 0x1A00, // 1A00..1A1B; BUGINESE 6174 0x1A1C, // 1A1C..1A1D; UNKNOWN 6175 0x1A1E, // 1A1E..1A1F; BUGINESE 6176 0x1A20, // 1A20..1A5E; TAI_THAM 6177 0x1A5F, // 1A5F ; UNKNOWN 6178 0x1A60, // 1A60..1A7C; TAI_THAM 6179 0x1A7D, // 1A7D..1A7E; UNKNOWN 6180 0x1A7F, // 1A7F..1A89; TAI_THAM 6181 0x1A8A, // 1A8A..1A8F; UNKNOWN 6182 0x1A90, // 1A90..1A99; TAI_THAM 6183 0x1A9A, // 1A9A..1A9F; UNKNOWN 6184 0x1AA0, // 1AA0..1AAD; TAI_THAM 6185 0x1AAE, // 1AAE..1AAF; UNKNOWN 6186 0x1AB0, // 1AB0..1ADD; INHERITED 6187 0x1ADE, // 1ADE..1ADF; UNKNOWN 6188 0x1AE0, // 1AE0..1AEB; INHERITED 6189 0x1AEC, // 1AEC..1AFF; UNKNOWN 6190 0x1B00, // 1B00..1B4C; BALINESE 6191 0x1B4D, // 1B4D ; UNKNOWN 6192 0x1B4E, // 1B4E..1B7F; BALINESE 6193 0x1B80, // 1B80..1BBF; SUNDANESE 6194 0x1BC0, // 1BC0..1BF3; BATAK 6195 0x1BF4, // 1BF4..1BFB; UNKNOWN 6196 0x1BFC, // 1BFC..1BFF; BATAK 6197 0x1C00, // 1C00..1C37; LEPCHA 6198 0x1C38, // 1C38..1C3A; UNKNOWN 6199 0x1C3B, // 1C3B..1C49; LEPCHA 6200 0x1C4A, // 1C4A..1C4C; UNKNOWN 6201 0x1C4D, // 1C4D..1C4F; LEPCHA 6202 0x1C50, // 1C50..1C7F; OL_CHIKI 6203 0x1C80, // 1C80..1C8A; CYRILLIC 6204 0x1C8B, // 1C8B..1C8F; UNKNOWN 6205 0x1C90, // 1C90..1CBA; GEORGIAN 6206 0x1CBB, // 1CBB..1CBC; UNKNOWN 6207 0x1CBD, // 1CBD..1CBF; GEORGIAN 6208 0x1CC0, // 1CC0..1CC7; SUNDANESE 6209 0x1CC8, // 1CC8..1CCF; UNKNOWN 6210 0x1CD0, // 1CD0..1CD2; INHERITED 6211 0x1CD3, // 1CD3 ; COMMON 6212 0x1CD4, // 1CD4..1CE0; INHERITED 6213 0x1CE1, // 1CE1 ; COMMON 6214 0x1CE2, // 1CE2..1CE8; INHERITED 6215 0x1CE9, // 1CE9..1CEC; COMMON 6216 0x1CED, // 1CED ; INHERITED 6217 0x1CEE, // 1CEE..1CF3; COMMON 6218 0x1CF4, // 1CF4 ; INHERITED 6219 0x1CF5, // 1CF5..1CF7; COMMON 6220 0x1CF8, // 1CF8..1CF9; INHERITED 6221 0x1CFA, // 1CFA ; COMMON 6222 0x1CFB, // 1CFB..1CFF; UNKNOWN 6223 0x1D00, // 1D00..1D25; LATIN 6224 0x1D26, // 1D26..1D2A; GREEK 6225 0x1D2B, // 1D2B ; CYRILLIC 6226 0x1D2C, // 1D2C..1D5C; LATIN 6227 0x1D5D, // 1D5D..1D61; GREEK 6228 0x1D62, // 1D62..1D65; LATIN 6229 0x1D66, // 1D66..1D6A; GREEK 6230 0x1D6B, // 1D6B..1D77; LATIN 6231 0x1D78, // 1D78 ; CYRILLIC 6232 0x1D79, // 1D79..1DBE; LATIN 6233 0x1DBF, // 1DBF ; GREEK 6234 0x1DC0, // 1DC0..1DFF; INHERITED 6235 0x1E00, // 1E00..1EFF; LATIN 6236 0x1F00, // 1F00..1F15; GREEK 6237 0x1F16, // 1F16..1F17; UNKNOWN 6238 0x1F18, // 1F18..1F1D; GREEK 6239 0x1F1E, // 1F1E..1F1F; UNKNOWN 6240 0x1F20, // 1F20..1F45; GREEK 6241 0x1F46, // 1F46..1F47; UNKNOWN 6242 0x1F48, // 1F48..1F4D; GREEK 6243 0x1F4E, // 1F4E..1F4F; UNKNOWN 6244 0x1F50, // 1F50..1F57; GREEK 6245 0x1F58, // 1F58 ; UNKNOWN 6246 0x1F59, // 1F59 ; GREEK 6247 0x1F5A, // 1F5A ; UNKNOWN 6248 0x1F5B, // 1F5B ; GREEK 6249 0x1F5C, // 1F5C ; UNKNOWN 6250 0x1F5D, // 1F5D ; GREEK 6251 0x1F5E, // 1F5E ; UNKNOWN 6252 0x1F5F, // 1F5F..1F7D; GREEK 6253 0x1F7E, // 1F7E..1F7F; UNKNOWN 6254 0x1F80, // 1F80..1FB4; GREEK 6255 0x1FB5, // 1FB5 ; UNKNOWN 6256 0x1FB6, // 1FB6..1FC4; GREEK 6257 0x1FC5, // 1FC5 ; UNKNOWN 6258 0x1FC6, // 1FC6..1FD3; GREEK 6259 0x1FD4, // 1FD4..1FD5; UNKNOWN 6260 0x1FD6, // 1FD6..1FDB; GREEK 6261 0x1FDC, // 1FDC ; UNKNOWN 6262 0x1FDD, // 1FDD..1FEF; GREEK 6263 0x1FF0, // 1FF0..1FF1; UNKNOWN 6264 0x1FF2, // 1FF2..1FF4; GREEK 6265 0x1FF5, // 1FF5 ; UNKNOWN 6266 0x1FF6, // 1FF6..1FFE; GREEK 6267 0x1FFF, // 1FFF ; UNKNOWN 6268 0x2000, // 2000..200B; COMMON 6269 0x200C, // 200C..200D; INHERITED 6270 0x200E, // 200E..2064; COMMON 6271 0x2065, // 2065 ; UNKNOWN 6272 0x2066, // 2066..2070; COMMON 6273 0x2071, // 2071 ; LATIN 6274 0x2072, // 2072..2073; UNKNOWN 6275 0x2074, // 2074..207E; COMMON 6276 0x207F, // 207F ; LATIN 6277 0x2080, // 2080..208E; COMMON 6278 0x208F, // 208F ; UNKNOWN 6279 0x2090, // 2090..209C; LATIN 6280 0x209D, // 209D..209F; UNKNOWN 6281 0x20A0, // 20A0..20C1; COMMON 6282 0x20C2, // 20C2..20CF; UNKNOWN 6283 0x20D0, // 20D0..20F0; INHERITED 6284 0x20F1, // 20F1..20FF; UNKNOWN 6285 0x2100, // 2100..2125; COMMON 6286 0x2126, // 2126 ; GREEK 6287 0x2127, // 2127..2129; COMMON 6288 0x212A, // 212A..212B; LATIN 6289 0x212C, // 212C..2131; COMMON 6290 0x2132, // 2132 ; LATIN 6291 0x2133, // 2133..214D; COMMON 6292 0x214E, // 214E ; LATIN 6293 0x214F, // 214F..215F; COMMON 6294 0x2160, // 2160..2188; LATIN 6295 0x2189, // 2189..218B; COMMON 6296 0x218C, // 218C..218F; UNKNOWN 6297 0x2190, // 2190..2429; COMMON 6298 0x242A, // 242A..243F; UNKNOWN 6299 0x2440, // 2440..244A; COMMON 6300 0x244B, // 244B..245F; UNKNOWN 6301 0x2460, // 2460..27FF; COMMON 6302 0x2800, // 2800..28FF; BRAILLE 6303 0x2900, // 2900..2B73; COMMON 6304 0x2B74, // 2B74..2B75; UNKNOWN 6305 0x2B76, // 2B76..2BFF; COMMON 6306 0x2C00, // 2C00..2C5F; GLAGOLITIC 6307 0x2C60, // 2C60..2C7F; LATIN 6308 0x2C80, // 2C80..2CF3; COPTIC 6309 0x2CF4, // 2CF4..2CF8; UNKNOWN 6310 0x2CF9, // 2CF9..2CFF; COPTIC 6311 0x2D00, // 2D00..2D25; GEORGIAN 6312 0x2D26, // 2D26 ; UNKNOWN 6313 0x2D27, // 2D27 ; GEORGIAN 6314 0x2D28, // 2D28..2D2C; UNKNOWN 6315 0x2D2D, // 2D2D ; GEORGIAN 6316 0x2D2E, // 2D2E..2D2F; UNKNOWN 6317 0x2D30, // 2D30..2D67; TIFINAGH 6318 0x2D68, // 2D68..2D6E; UNKNOWN 6319 0x2D6F, // 2D6F..2D70; TIFINAGH 6320 0x2D71, // 2D71..2D7E; UNKNOWN 6321 0x2D7F, // 2D7F ; TIFINAGH 6322 0x2D80, // 2D80..2D96; ETHIOPIC 6323 0x2D97, // 2D97..2D9F; UNKNOWN 6324 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6325 0x2DA7, // 2DA7 ; UNKNOWN 6326 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6327 0x2DAF, // 2DAF ; UNKNOWN 6328 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6329 0x2DB7, // 2DB7 ; UNKNOWN 6330 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6331 0x2DBF, // 2DBF ; UNKNOWN 6332 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6333 0x2DC7, // 2DC7 ; UNKNOWN 6334 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6335 0x2DCF, // 2DCF ; UNKNOWN 6336 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6337 0x2DD7, // 2DD7 ; UNKNOWN 6338 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6339 0x2DDF, // 2DDF ; UNKNOWN 6340 0x2DE0, // 2DE0..2DFF; CYRILLIC 6341 0x2E00, // 2E00..2E5D; COMMON 6342 0x2E5E, // 2E5E..2E7F; UNKNOWN 6343 0x2E80, // 2E80..2E99; HAN 6344 0x2E9A, // 2E9A ; UNKNOWN 6345 0x2E9B, // 2E9B..2EF3; HAN 6346 0x2EF4, // 2EF4..2EFF; UNKNOWN 6347 0x2F00, // 2F00..2FD5; HAN 6348 0x2FD6, // 2FD6..2FEF; UNKNOWN 6349 0x2FF0, // 2FF0..3004; COMMON 6350 0x3005, // 3005 ; HAN 6351 0x3006, // 3006 ; COMMON 6352 0x3007, // 3007 ; HAN 6353 0x3008, // 3008..3020; COMMON 6354 0x3021, // 3021..3029; HAN 6355 0x302A, // 302A..302D; INHERITED 6356 0x302E, // 302E..302F; HANGUL 6357 0x3030, // 3030..3037; COMMON 6358 0x3038, // 3038..303B; HAN 6359 0x303C, // 303C..303F; COMMON 6360 0x3040, // 3040 ; UNKNOWN 6361 0x3041, // 3041..3096; HIRAGANA 6362 0x3097, // 3097..3098; UNKNOWN 6363 0x3099, // 3099..309A; INHERITED 6364 0x309B, // 309B..309C; COMMON 6365 0x309D, // 309D..309F; HIRAGANA 6366 0x30A0, // 30A0 ; COMMON 6367 0x30A1, // 30A1..30FA; KATAKANA 6368 0x30FB, // 30FB..30FC; COMMON 6369 0x30FD, // 30FD..30FF; KATAKANA 6370 0x3100, // 3100..3104; UNKNOWN 6371 0x3105, // 3105..312F; BOPOMOFO 6372 0x3130, // 3130 ; UNKNOWN 6373 0x3131, // 3131..318E; HANGUL 6374 0x318F, // 318F ; UNKNOWN 6375 0x3190, // 3190..319F; COMMON 6376 0x31A0, // 31A0..31BF; BOPOMOFO 6377 0x31C0, // 31C0..31E5; COMMON 6378 0x31E6, // 31E6..31EE; UNKNOWN 6379 0x31EF, // 31EF ; COMMON 6380 0x31F0, // 31F0..31FF; KATAKANA 6381 0x3200, // 3200..321E; HANGUL 6382 0x321F, // 321F ; UNKNOWN 6383 0x3220, // 3220..325F; COMMON 6384 0x3260, // 3260..327E; HANGUL 6385 0x327F, // 327F..32CF; COMMON 6386 0x32D0, // 32D0..32FE; KATAKANA 6387 0x32FF, // 32FF ; COMMON 6388 0x3300, // 3300..3357; KATAKANA 6389 0x3358, // 3358..33FF; COMMON 6390 0x3400, // 3400..4DBF; HAN 6391 0x4DC0, // 4DC0..4DFF; COMMON 6392 0x4E00, // 4E00..9FFF; HAN 6393 0xA000, // A000..A48C; YI 6394 0xA48D, // A48D..A48F; UNKNOWN 6395 0xA490, // A490..A4C6; YI 6396 0xA4C7, // A4C7..A4CF; UNKNOWN 6397 0xA4D0, // A4D0..A4FF; LISU 6398 0xA500, // A500..A62B; VAI 6399 0xA62C, // A62C..A63F; UNKNOWN 6400 0xA640, // A640..A69F; CYRILLIC 6401 0xA6A0, // A6A0..A6F7; BAMUM 6402 0xA6F8, // A6F8..A6FF; UNKNOWN 6403 0xA700, // A700..A721; COMMON 6404 0xA722, // A722..A787; LATIN 6405 0xA788, // A788..A78A; COMMON 6406 0xA78B, // A78B..A7DC; LATIN 6407 0xA7DD, // A7DD..A7F0; UNKNOWN 6408 0xA7F1, // A7F1..A7FF; LATIN 6409 0xA800, // A800..A82C; SYLOTI_NAGRI 6410 0xA82D, // A82D..A82F; UNKNOWN 6411 0xA830, // A830..A839; COMMON 6412 0xA83A, // A83A..A83F; UNKNOWN 6413 0xA840, // A840..A877; PHAGS_PA 6414 0xA878, // A878..A87F; UNKNOWN 6415 0xA880, // A880..A8C5; SAURASHTRA 6416 0xA8C6, // A8C6..A8CD; UNKNOWN 6417 0xA8CE, // A8CE..A8D9; SAURASHTRA 6418 0xA8DA, // A8DA..A8DF; UNKNOWN 6419 0xA8E0, // A8E0..A8FF; DEVANAGARI 6420 0xA900, // A900..A92D; KAYAH_LI 6421 0xA92E, // A92E ; COMMON 6422 0xA92F, // A92F ; KAYAH_LI 6423 0xA930, // A930..A953; REJANG 6424 0xA954, // A954..A95E; UNKNOWN 6425 0xA95F, // A95F ; REJANG 6426 0xA960, // A960..A97C; HANGUL 6427 0xA97D, // A97D..A97F; UNKNOWN 6428 0xA980, // A980..A9CD; JAVANESE 6429 0xA9CE, // A9CE ; UNKNOWN 6430 0xA9CF, // A9CF ; COMMON 6431 0xA9D0, // A9D0..A9D9; JAVANESE 6432 0xA9DA, // A9DA..A9DD; UNKNOWN 6433 0xA9DE, // A9DE..A9DF; JAVANESE 6434 0xA9E0, // A9E0..A9FE; MYANMAR 6435 0xA9FF, // A9FF ; UNKNOWN 6436 0xAA00, // AA00..AA36; CHAM 6437 0xAA37, // AA37..AA3F; UNKNOWN 6438 0xAA40, // AA40..AA4D; CHAM 6439 0xAA4E, // AA4E..AA4F; UNKNOWN 6440 0xAA50, // AA50..AA59; CHAM 6441 0xAA5A, // AA5A..AA5B; UNKNOWN 6442 0xAA5C, // AA5C..AA5F; CHAM 6443 0xAA60, // AA60..AA7F; MYANMAR 6444 0xAA80, // AA80..AAC2; TAI_VIET 6445 0xAAC3, // AAC3..AADA; UNKNOWN 6446 0xAADB, // AADB..AADF; TAI_VIET 6447 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6448 0xAAF7, // AAF7..AB00; UNKNOWN 6449 0xAB01, // AB01..AB06; ETHIOPIC 6450 0xAB07, // AB07..AB08; UNKNOWN 6451 0xAB09, // AB09..AB0E; ETHIOPIC 6452 0xAB0F, // AB0F..AB10; UNKNOWN 6453 0xAB11, // AB11..AB16; ETHIOPIC 6454 0xAB17, // AB17..AB1F; UNKNOWN 6455 0xAB20, // AB20..AB26; ETHIOPIC 6456 0xAB27, // AB27 ; UNKNOWN 6457 0xAB28, // AB28..AB2E; ETHIOPIC 6458 0xAB2F, // AB2F ; UNKNOWN 6459 0xAB30, // AB30..AB5A; LATIN 6460 0xAB5B, // AB5B ; COMMON 6461 0xAB5C, // AB5C..AB64; LATIN 6462 0xAB65, // AB65 ; GREEK 6463 0xAB66, // AB66..AB69; LATIN 6464 0xAB6A, // AB6A..AB6B; COMMON 6465 0xAB6C, // AB6C..AB6F; UNKNOWN 6466 0xAB70, // AB70..ABBF; CHEROKEE 6467 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6468 0xABEE, // ABEE..ABEF; UNKNOWN 6469 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6470 0xABFA, // ABFA..ABFF; UNKNOWN 6471 0xAC00, // AC00..D7A3; HANGUL 6472 0xD7A4, // D7A4..D7AF; UNKNOWN 6473 0xD7B0, // D7B0..D7C6; HANGUL 6474 0xD7C7, // D7C7..D7CA; UNKNOWN 6475 0xD7CB, // D7CB..D7FB; HANGUL 6476 0xD7FC, // D7FC..F8FF; UNKNOWN 6477 0xF900, // F900..FA6D; HAN 6478 0xFA6E, // FA6E..FA6F; UNKNOWN 6479 0xFA70, // FA70..FAD9; HAN 6480 0xFADA, // FADA..FAFF; UNKNOWN 6481 0xFB00, // FB00..FB06; LATIN 6482 0xFB07, // FB07..FB12; UNKNOWN 6483 0xFB13, // FB13..FB17; ARMENIAN 6484 0xFB18, // FB18..FB1C; UNKNOWN 6485 0xFB1D, // FB1D..FB36; HEBREW 6486 0xFB37, // FB37 ; UNKNOWN 6487 0xFB38, // FB38..FB3C; HEBREW 6488 0xFB3D, // FB3D ; UNKNOWN 6489 0xFB3E, // FB3E ; HEBREW 6490 0xFB3F, // FB3F ; UNKNOWN 6491 0xFB40, // FB40..FB41; HEBREW 6492 0xFB42, // FB42 ; UNKNOWN 6493 0xFB43, // FB43..FB44; HEBREW 6494 0xFB45, // FB45 ; UNKNOWN 6495 0xFB46, // FB46..FB4F; HEBREW 6496 0xFB50, // FB50..FD3D; ARABIC 6497 0xFD3E, // FD3E..FD3F; COMMON 6498 0xFD40, // FD40..FDCF; ARABIC 6499 0xFDD0, // FDD0..FDEF; UNKNOWN 6500 0xFDF0, // FDF0..FDFF; ARABIC 6501 0xFE00, // FE00..FE0F; INHERITED 6502 0xFE10, // FE10..FE19; COMMON 6503 0xFE1A, // FE1A..FE1F; UNKNOWN 6504 0xFE20, // FE20..FE2D; INHERITED 6505 0xFE2E, // FE2E..FE2F; CYRILLIC 6506 0xFE30, // FE30..FE52; COMMON 6507 0xFE53, // FE53 ; UNKNOWN 6508 0xFE54, // FE54..FE66; COMMON 6509 0xFE67, // FE67 ; UNKNOWN 6510 0xFE68, // FE68..FE6B; COMMON 6511 0xFE6C, // FE6C..FE6F; UNKNOWN 6512 0xFE70, // FE70..FE74; ARABIC 6513 0xFE75, // FE75 ; UNKNOWN 6514 0xFE76, // FE76..FEFC; ARABIC 6515 0xFEFD, // FEFD..FEFE; UNKNOWN 6516 0xFEFF, // FEFF ; COMMON 6517 0xFF00, // FF00 ; UNKNOWN 6518 0xFF01, // FF01..FF20; COMMON 6519 0xFF21, // FF21..FF3A; LATIN 6520 0xFF3B, // FF3B..FF40; COMMON 6521 0xFF41, // FF41..FF5A; LATIN 6522 0xFF5B, // FF5B..FF65; COMMON 6523 0xFF66, // FF66..FF6F; KATAKANA 6524 0xFF70, // FF70 ; COMMON 6525 0xFF71, // FF71..FF9D; KATAKANA 6526 0xFF9E, // FF9E..FF9F; COMMON 6527 0xFFA0, // FFA0..FFBE; HANGUL 6528 0xFFBF, // FFBF..FFC1; UNKNOWN 6529 0xFFC2, // FFC2..FFC7; HANGUL 6530 0xFFC8, // FFC8..FFC9; UNKNOWN 6531 0xFFCA, // FFCA..FFCF; HANGUL 6532 0xFFD0, // FFD0..FFD1; UNKNOWN 6533 0xFFD2, // FFD2..FFD7; HANGUL 6534 0xFFD8, // FFD8..FFD9; UNKNOWN 6535 0xFFDA, // FFDA..FFDC; HANGUL 6536 0xFFDD, // FFDD..FFDF; UNKNOWN 6537 0xFFE0, // FFE0..FFE6; COMMON 6538 0xFFE7, // FFE7 ; UNKNOWN 6539 0xFFE8, // FFE8..FFEE; COMMON 6540 0xFFEF, // FFEF..FFF8; UNKNOWN 6541 0xFFF9, // FFF9..FFFD; COMMON 6542 0xFFFE, // FFFE..FFFF; UNKNOWN 6543 0x10000, // 10000..1000B; LINEAR_B 6544 0x1000C, // 1000C ; UNKNOWN 6545 0x1000D, // 1000D..10026; LINEAR_B 6546 0x10027, // 10027 ; UNKNOWN 6547 0x10028, // 10028..1003A; LINEAR_B 6548 0x1003B, // 1003B ; UNKNOWN 6549 0x1003C, // 1003C..1003D; LINEAR_B 6550 0x1003E, // 1003E ; UNKNOWN 6551 0x1003F, // 1003F..1004D; LINEAR_B 6552 0x1004E, // 1004E..1004F; UNKNOWN 6553 0x10050, // 10050..1005D; LINEAR_B 6554 0x1005E, // 1005E..1007F; UNKNOWN 6555 0x10080, // 10080..100FA; LINEAR_B 6556 0x100FB, // 100FB..100FF; UNKNOWN 6557 0x10100, // 10100..10102; COMMON 6558 0x10103, // 10103..10106; UNKNOWN 6559 0x10107, // 10107..10133; COMMON 6560 0x10134, // 10134..10136; UNKNOWN 6561 0x10137, // 10137..1013F; COMMON 6562 0x10140, // 10140..1018E; GREEK 6563 0x1018F, // 1018F ; UNKNOWN 6564 0x10190, // 10190..1019C; COMMON 6565 0x1019D, // 1019D..1019F; UNKNOWN 6566 0x101A0, // 101A0 ; GREEK 6567 0x101A1, // 101A1..101CF; UNKNOWN 6568 0x101D0, // 101D0..101FC; COMMON 6569 0x101FD, // 101FD ; INHERITED 6570 0x101FE, // 101FE..1027F; UNKNOWN 6571 0x10280, // 10280..1029C; LYCIAN 6572 0x1029D, // 1029D..1029F; UNKNOWN 6573 0x102A0, // 102A0..102D0; CARIAN 6574 0x102D1, // 102D1..102DF; UNKNOWN 6575 0x102E0, // 102E0 ; INHERITED 6576 0x102E1, // 102E1..102FB; COMMON 6577 0x102FC, // 102FC..102FF; UNKNOWN 6578 0x10300, // 10300..10323; OLD_ITALIC 6579 0x10324, // 10324..1032C; UNKNOWN 6580 0x1032D, // 1032D..1032F; OLD_ITALIC 6581 0x10330, // 10330..1034A; GOTHIC 6582 0x1034B, // 1034B..1034F; UNKNOWN 6583 0x10350, // 10350..1037A; OLD_PERMIC 6584 0x1037B, // 1037B..1037F; UNKNOWN 6585 0x10380, // 10380..1039D; UGARITIC 6586 0x1039E, // 1039E ; UNKNOWN 6587 0x1039F, // 1039F ; UGARITIC 6588 0x103A0, // 103A0..103C3; OLD_PERSIAN 6589 0x103C4, // 103C4..103C7; UNKNOWN 6590 0x103C8, // 103C8..103D5; OLD_PERSIAN 6591 0x103D6, // 103D6..103FF; UNKNOWN 6592 0x10400, // 10400..1044F; DESERET 6593 0x10450, // 10450..1047F; SHAVIAN 6594 0x10480, // 10480..1049D; OSMANYA 6595 0x1049E, // 1049E..1049F; UNKNOWN 6596 0x104A0, // 104A0..104A9; OSMANYA 6597 0x104AA, // 104AA..104AF; UNKNOWN 6598 0x104B0, // 104B0..104D3; OSAGE 6599 0x104D4, // 104D4..104D7; UNKNOWN 6600 0x104D8, // 104D8..104FB; OSAGE 6601 0x104FC, // 104FC..104FF; UNKNOWN 6602 0x10500, // 10500..10527; ELBASAN 6603 0x10528, // 10528..1052F; UNKNOWN 6604 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6605 0x10564, // 10564..1056E; UNKNOWN 6606 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6607 0x10570, // 10570..1057A; VITHKUQI 6608 0x1057B, // 1057B ; UNKNOWN 6609 0x1057C, // 1057C..1058A; VITHKUQI 6610 0x1058B, // 1058B ; UNKNOWN 6611 0x1058C, // 1058C..10592; VITHKUQI 6612 0x10593, // 10593 ; UNKNOWN 6613 0x10594, // 10594..10595; VITHKUQI 6614 0x10596, // 10596 ; UNKNOWN 6615 0x10597, // 10597..105A1; VITHKUQI 6616 0x105A2, // 105A2 ; UNKNOWN 6617 0x105A3, // 105A3..105B1; VITHKUQI 6618 0x105B2, // 105B2 ; UNKNOWN 6619 0x105B3, // 105B3..105B9; VITHKUQI 6620 0x105BA, // 105BA ; UNKNOWN 6621 0x105BB, // 105BB..105BC; VITHKUQI 6622 0x105BD, // 105BD..105BF; UNKNOWN 6623 0x105C0, // 105C0..105F3; TODHRI 6624 0x105F4, // 105F4..105FF; UNKNOWN 6625 0x10600, // 10600..10736; LINEAR_A 6626 0x10737, // 10737..1073F; UNKNOWN 6627 0x10740, // 10740..10755; LINEAR_A 6628 0x10756, // 10756..1075F; UNKNOWN 6629 0x10760, // 10760..10767; LINEAR_A 6630 0x10768, // 10768..1077F; UNKNOWN 6631 0x10780, // 10780..10785; LATIN 6632 0x10786, // 10786 ; UNKNOWN 6633 0x10787, // 10787..107B0; LATIN 6634 0x107B1, // 107B1 ; UNKNOWN 6635 0x107B2, // 107B2..107BA; LATIN 6636 0x107BB, // 107BB..107FF; UNKNOWN 6637 0x10800, // 10800..10805; CYPRIOT 6638 0x10806, // 10806..10807; UNKNOWN 6639 0x10808, // 10808 ; CYPRIOT 6640 0x10809, // 10809 ; UNKNOWN 6641 0x1080A, // 1080A..10835; CYPRIOT 6642 0x10836, // 10836 ; UNKNOWN 6643 0x10837, // 10837..10838; CYPRIOT 6644 0x10839, // 10839..1083B; UNKNOWN 6645 0x1083C, // 1083C ; CYPRIOT 6646 0x1083D, // 1083D..1083E; UNKNOWN 6647 0x1083F, // 1083F ; CYPRIOT 6648 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6649 0x10856, // 10856 ; UNKNOWN 6650 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6651 0x10860, // 10860..1087F; PALMYRENE 6652 0x10880, // 10880..1089E; NABATAEAN 6653 0x1089F, // 1089F..108A6; UNKNOWN 6654 0x108A7, // 108A7..108AF; NABATAEAN 6655 0x108B0, // 108B0..108DF; UNKNOWN 6656 0x108E0, // 108E0..108F2; HATRAN 6657 0x108F3, // 108F3 ; UNKNOWN 6658 0x108F4, // 108F4..108F5; HATRAN 6659 0x108F6, // 108F6..108FA; UNKNOWN 6660 0x108FB, // 108FB..108FF; HATRAN 6661 0x10900, // 10900..1091B; PHOENICIAN 6662 0x1091C, // 1091C..1091E; UNKNOWN 6663 0x1091F, // 1091F ; PHOENICIAN 6664 0x10920, // 10920..10939; LYDIAN 6665 0x1093A, // 1093A..1093E; UNKNOWN 6666 0x1093F, // 1093F ; LYDIAN 6667 0x10940, // 10940..10959; SIDETIC 6668 0x1095A, // 1095A..1097F; UNKNOWN 6669 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6670 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6671 0x109B8, // 109B8..109BB; UNKNOWN 6672 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6673 0x109D0, // 109D0..109D1; UNKNOWN 6674 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6675 0x10A00, // 10A00..10A03; KHAROSHTHI 6676 0x10A04, // 10A04 ; UNKNOWN 6677 0x10A05, // 10A05..10A06; KHAROSHTHI 6678 0x10A07, // 10A07..10A0B; UNKNOWN 6679 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6680 0x10A14, // 10A14 ; UNKNOWN 6681 0x10A15, // 10A15..10A17; KHAROSHTHI 6682 0x10A18, // 10A18 ; UNKNOWN 6683 0x10A19, // 10A19..10A35; KHAROSHTHI 6684 0x10A36, // 10A36..10A37; UNKNOWN 6685 0x10A38, // 10A38..10A3A; KHAROSHTHI 6686 0x10A3B, // 10A3B..10A3E; UNKNOWN 6687 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6688 0x10A49, // 10A49..10A4F; UNKNOWN 6689 0x10A50, // 10A50..10A58; KHAROSHTHI 6690 0x10A59, // 10A59..10A5F; UNKNOWN 6691 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6692 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6693 0x10AA0, // 10AA0..10ABF; UNKNOWN 6694 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6695 0x10AE7, // 10AE7..10AEA; UNKNOWN 6696 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6697 0x10AF7, // 10AF7..10AFF; UNKNOWN 6698 0x10B00, // 10B00..10B35; AVESTAN 6699 0x10B36, // 10B36..10B38; UNKNOWN 6700 0x10B39, // 10B39..10B3F; AVESTAN 6701 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6702 0x10B56, // 10B56..10B57; UNKNOWN 6703 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6704 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6705 0x10B73, // 10B73..10B77; UNKNOWN 6706 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6707 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6708 0x10B92, // 10B92..10B98; UNKNOWN 6709 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6710 0x10B9D, // 10B9D..10BA8; UNKNOWN 6711 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6712 0x10BB0, // 10BB0..10BFF; UNKNOWN 6713 0x10C00, // 10C00..10C48; OLD_TURKIC 6714 0x10C49, // 10C49..10C7F; UNKNOWN 6715 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6716 0x10CB3, // 10CB3..10CBF; UNKNOWN 6717 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6718 0x10CF3, // 10CF3..10CF9; UNKNOWN 6719 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6720 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6721 0x10D28, // 10D28..10D2F; UNKNOWN 6722 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6723 0x10D3A, // 10D3A..10D3F; UNKNOWN 6724 0x10D40, // 10D40..10D65; GARAY 6725 0x10D66, // 10D66..10D68; UNKNOWN 6726 0x10D69, // 10D69..10D85; GARAY 6727 0x10D86, // 10D86..10D8D; UNKNOWN 6728 0x10D8E, // 10D8E..10D8F; GARAY 6729 0x10D90, // 10D90..10E5F; UNKNOWN 6730 0x10E60, // 10E60..10E7E; ARABIC 6731 0x10E7F, // 10E7F ; UNKNOWN 6732 0x10E80, // 10E80..10EA9; YEZIDI 6733 0x10EAA, // 10EAA ; UNKNOWN 6734 0x10EAB, // 10EAB..10EAD; YEZIDI 6735 0x10EAE, // 10EAE..10EAF; UNKNOWN 6736 0x10EB0, // 10EB0..10EB1; YEZIDI 6737 0x10EB2, // 10EB2..10EC1; UNKNOWN 6738 0x10EC2, // 10EC2..10EC7; ARABIC 6739 0x10EC8, // 10EC8..10ECF; UNKNOWN 6740 0x10ED0, // 10ED0..10ED8; ARABIC 6741 0x10ED9, // 10ED9..10EF9; UNKNOWN 6742 0x10EFA, // 10EFA..10EFF; ARABIC 6743 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6744 0x10F28, // 10F28..10F2F; UNKNOWN 6745 0x10F30, // 10F30..10F59; SOGDIAN 6746 0x10F5A, // 10F5A..10F6F; UNKNOWN 6747 0x10F70, // 10F70..10F89; OLD_UYGHUR 6748 0x10F8A, // 10F8A..10FAF; UNKNOWN 6749 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6750 0x10FCC, // 10FCC..10FDF; UNKNOWN 6751 0x10FE0, // 10FE0..10FF6; ELYMAIC 6752 0x10FF7, // 10FF7..10FFF; UNKNOWN 6753 0x11000, // 11000..1104D; BRAHMI 6754 0x1104E, // 1104E..11051; UNKNOWN 6755 0x11052, // 11052..11075; BRAHMI 6756 0x11076, // 11076..1107E; UNKNOWN 6757 0x1107F, // 1107F ; BRAHMI 6758 0x11080, // 11080..110C2; KAITHI 6759 0x110C3, // 110C3..110CC; UNKNOWN 6760 0x110CD, // 110CD ; KAITHI 6761 0x110CE, // 110CE..110CF; UNKNOWN 6762 0x110D0, // 110D0..110E8; SORA_SOMPENG 6763 0x110E9, // 110E9..110EF; UNKNOWN 6764 0x110F0, // 110F0..110F9; SORA_SOMPENG 6765 0x110FA, // 110FA..110FF; UNKNOWN 6766 0x11100, // 11100..11134; CHAKMA 6767 0x11135, // 11135 ; UNKNOWN 6768 0x11136, // 11136..11147; CHAKMA 6769 0x11148, // 11148..1114F; UNKNOWN 6770 0x11150, // 11150..11176; MAHAJANI 6771 0x11177, // 11177..1117F; UNKNOWN 6772 0x11180, // 11180..111DF; SHARADA 6773 0x111E0, // 111E0 ; UNKNOWN 6774 0x111E1, // 111E1..111F4; SINHALA 6775 0x111F5, // 111F5..111FF; UNKNOWN 6776 0x11200, // 11200..11211; KHOJKI 6777 0x11212, // 11212 ; UNKNOWN 6778 0x11213, // 11213..11241; KHOJKI 6779 0x11242, // 11242..1127F; UNKNOWN 6780 0x11280, // 11280..11286; MULTANI 6781 0x11287, // 11287 ; UNKNOWN 6782 0x11288, // 11288 ; MULTANI 6783 0x11289, // 11289 ; UNKNOWN 6784 0x1128A, // 1128A..1128D; MULTANI 6785 0x1128E, // 1128E ; UNKNOWN 6786 0x1128F, // 1128F..1129D; MULTANI 6787 0x1129E, // 1129E ; UNKNOWN 6788 0x1129F, // 1129F..112A9; MULTANI 6789 0x112AA, // 112AA..112AF; UNKNOWN 6790 0x112B0, // 112B0..112EA; KHUDAWADI 6791 0x112EB, // 112EB..112EF; UNKNOWN 6792 0x112F0, // 112F0..112F9; KHUDAWADI 6793 0x112FA, // 112FA..112FF; UNKNOWN 6794 0x11300, // 11300..11303; GRANTHA 6795 0x11304, // 11304 ; UNKNOWN 6796 0x11305, // 11305..1130C; GRANTHA 6797 0x1130D, // 1130D..1130E; UNKNOWN 6798 0x1130F, // 1130F..11310; GRANTHA 6799 0x11311, // 11311..11312; UNKNOWN 6800 0x11313, // 11313..11328; GRANTHA 6801 0x11329, // 11329 ; UNKNOWN 6802 0x1132A, // 1132A..11330; GRANTHA 6803 0x11331, // 11331 ; UNKNOWN 6804 0x11332, // 11332..11333; GRANTHA 6805 0x11334, // 11334 ; UNKNOWN 6806 0x11335, // 11335..11339; GRANTHA 6807 0x1133A, // 1133A ; UNKNOWN 6808 0x1133B, // 1133B ; INHERITED 6809 0x1133C, // 1133C..11344; GRANTHA 6810 0x11345, // 11345..11346; UNKNOWN 6811 0x11347, // 11347..11348; GRANTHA 6812 0x11349, // 11349..1134A; UNKNOWN 6813 0x1134B, // 1134B..1134D; GRANTHA 6814 0x1134E, // 1134E..1134F; UNKNOWN 6815 0x11350, // 11350 ; GRANTHA 6816 0x11351, // 11351..11356; UNKNOWN 6817 0x11357, // 11357 ; GRANTHA 6818 0x11358, // 11358..1135C; UNKNOWN 6819 0x1135D, // 1135D..11363; GRANTHA 6820 0x11364, // 11364..11365; UNKNOWN 6821 0x11366, // 11366..1136C; GRANTHA 6822 0x1136D, // 1136D..1136F; UNKNOWN 6823 0x11370, // 11370..11374; GRANTHA 6824 0x11375, // 11375..1137F; UNKNOWN 6825 0x11380, // 11380..11389; TULU_TIGALARI 6826 0x1138A, // 1138A ; UNKNOWN 6827 0x1138B, // 1138B ; TULU_TIGALARI 6828 0x1138C, // 1138C..1138D; UNKNOWN 6829 0x1138E, // 1138E ; TULU_TIGALARI 6830 0x1138F, // 1138F ; UNKNOWN 6831 0x11390, // 11390..113B5; TULU_TIGALARI 6832 0x113B6, // 113B6 ; UNKNOWN 6833 0x113B7, // 113B7..113C0; TULU_TIGALARI 6834 0x113C1, // 113C1 ; UNKNOWN 6835 0x113C2, // 113C2 ; TULU_TIGALARI 6836 0x113C3, // 113C3..113C4; UNKNOWN 6837 0x113C5, // 113C5 ; TULU_TIGALARI 6838 0x113C6, // 113C6 ; UNKNOWN 6839 0x113C7, // 113C7..113CA; TULU_TIGALARI 6840 0x113CB, // 113CB ; UNKNOWN 6841 0x113CC, // 113CC..113D5; TULU_TIGALARI 6842 0x113D6, // 113D6 ; UNKNOWN 6843 0x113D7, // 113D7..113D8; TULU_TIGALARI 6844 0x113D9, // 113D9..113E0; UNKNOWN 6845 0x113E1, // 113E1..113E2; TULU_TIGALARI 6846 0x113E3, // 113E3..113FF; UNKNOWN 6847 0x11400, // 11400..1145B; NEWA 6848 0x1145C, // 1145C ; UNKNOWN 6849 0x1145D, // 1145D..11461; NEWA 6850 0x11462, // 11462..1147F; UNKNOWN 6851 0x11480, // 11480..114C7; TIRHUTA 6852 0x114C8, // 114C8..114CF; UNKNOWN 6853 0x114D0, // 114D0..114D9; TIRHUTA 6854 0x114DA, // 114DA..1157F; UNKNOWN 6855 0x11580, // 11580..115B5; SIDDHAM 6856 0x115B6, // 115B6..115B7; UNKNOWN 6857 0x115B8, // 115B8..115DD; SIDDHAM 6858 0x115DE, // 115DE..115FF; UNKNOWN 6859 0x11600, // 11600..11644; MODI 6860 0x11645, // 11645..1164F; UNKNOWN 6861 0x11650, // 11650..11659; MODI 6862 0x1165A, // 1165A..1165F; UNKNOWN 6863 0x11660, // 11660..1166C; MONGOLIAN 6864 0x1166D, // 1166D..1167F; UNKNOWN 6865 0x11680, // 11680..116B9; TAKRI 6866 0x116BA, // 116BA..116BF; UNKNOWN 6867 0x116C0, // 116C0..116C9; TAKRI 6868 0x116CA, // 116CA..116CF; UNKNOWN 6869 0x116D0, // 116D0..116E3; MYANMAR 6870 0x116E4, // 116E4..116FF; UNKNOWN 6871 0x11700, // 11700..1171A; AHOM 6872 0x1171B, // 1171B..1171C; UNKNOWN 6873 0x1171D, // 1171D..1172B; AHOM 6874 0x1172C, // 1172C..1172F; UNKNOWN 6875 0x11730, // 11730..11746; AHOM 6876 0x11747, // 11747..117FF; UNKNOWN 6877 0x11800, // 11800..1183B; DOGRA 6878 0x1183C, // 1183C..1189F; UNKNOWN 6879 0x118A0, // 118A0..118F2; WARANG_CITI 6880 0x118F3, // 118F3..118FE; UNKNOWN 6881 0x118FF, // 118FF ; WARANG_CITI 6882 0x11900, // 11900..11906; DIVES_AKURU 6883 0x11907, // 11907..11908; UNKNOWN 6884 0x11909, // 11909 ; DIVES_AKURU 6885 0x1190A, // 1190A..1190B; UNKNOWN 6886 0x1190C, // 1190C..11913; DIVES_AKURU 6887 0x11914, // 11914 ; UNKNOWN 6888 0x11915, // 11915..11916; DIVES_AKURU 6889 0x11917, // 11917 ; UNKNOWN 6890 0x11918, // 11918..11935; DIVES_AKURU 6891 0x11936, // 11936 ; UNKNOWN 6892 0x11937, // 11937..11938; DIVES_AKURU 6893 0x11939, // 11939..1193A; UNKNOWN 6894 0x1193B, // 1193B..11946; DIVES_AKURU 6895 0x11947, // 11947..1194F; UNKNOWN 6896 0x11950, // 11950..11959; DIVES_AKURU 6897 0x1195A, // 1195A..1199F; UNKNOWN 6898 0x119A0, // 119A0..119A7; NANDINAGARI 6899 0x119A8, // 119A8..119A9; UNKNOWN 6900 0x119AA, // 119AA..119D7; NANDINAGARI 6901 0x119D8, // 119D8..119D9; UNKNOWN 6902 0x119DA, // 119DA..119E4; NANDINAGARI 6903 0x119E5, // 119E5..119FF; UNKNOWN 6904 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6905 0x11A48, // 11A48..11A4F; UNKNOWN 6906 0x11A50, // 11A50..11AA2; SOYOMBO 6907 0x11AA3, // 11AA3..11AAF; UNKNOWN 6908 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6909 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6910 0x11AF9, // 11AF9..11AFF; UNKNOWN 6911 0x11B00, // 11B00..11B09; DEVANAGARI 6912 0x11B0A, // 11B0A..11B5F; UNKNOWN 6913 0x11B60, // 11B60..11B67; SHARADA 6914 0x11B68, // 11B68..11BBF; UNKNOWN 6915 0x11BC0, // 11BC0..11BE1; SUNUWAR 6916 0x11BE2, // 11BE2..11BEF; UNKNOWN 6917 0x11BF0, // 11BF0..11BF9; SUNUWAR 6918 0x11BFA, // 11BFA..11BFF; UNKNOWN 6919 0x11C00, // 11C00..11C08; BHAIKSUKI 6920 0x11C09, // 11C09 ; UNKNOWN 6921 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6922 0x11C37, // 11C37 ; UNKNOWN 6923 0x11C38, // 11C38..11C45; BHAIKSUKI 6924 0x11C46, // 11C46..11C4F; UNKNOWN 6925 0x11C50, // 11C50..11C6C; BHAIKSUKI 6926 0x11C6D, // 11C6D..11C6F; UNKNOWN 6927 0x11C70, // 11C70..11C8F; MARCHEN 6928 0x11C90, // 11C90..11C91; UNKNOWN 6929 0x11C92, // 11C92..11CA7; MARCHEN 6930 0x11CA8, // 11CA8 ; UNKNOWN 6931 0x11CA9, // 11CA9..11CB6; MARCHEN 6932 0x11CB7, // 11CB7..11CFF; UNKNOWN 6933 0x11D00, // 11D00..11D06; MASARAM_GONDI 6934 0x11D07, // 11D07 ; UNKNOWN 6935 0x11D08, // 11D08..11D09; MASARAM_GONDI 6936 0x11D0A, // 11D0A ; UNKNOWN 6937 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6938 0x11D37, // 11D37..11D39; UNKNOWN 6939 0x11D3A, // 11D3A ; MASARAM_GONDI 6940 0x11D3B, // 11D3B ; UNKNOWN 6941 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6942 0x11D3E, // 11D3E ; UNKNOWN 6943 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6944 0x11D48, // 11D48..11D4F; UNKNOWN 6945 0x11D50, // 11D50..11D59; MASARAM_GONDI 6946 0x11D5A, // 11D5A..11D5F; UNKNOWN 6947 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6948 0x11D66, // 11D66 ; UNKNOWN 6949 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6950 0x11D69, // 11D69 ; UNKNOWN 6951 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6952 0x11D8F, // 11D8F ; UNKNOWN 6953 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6954 0x11D92, // 11D92 ; UNKNOWN 6955 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6956 0x11D99, // 11D99..11D9F; UNKNOWN 6957 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6958 0x11DAA, // 11DAA..11DAF; UNKNOWN 6959 0x11DB0, // 11DB0..11DDB; TOLONG_SIKI 6960 0x11DDC, // 11DDC..11DDF; UNKNOWN 6961 0x11DE0, // 11DE0..11DE9; TOLONG_SIKI 6962 0x11DEA, // 11DEA..11EDF; UNKNOWN 6963 0x11EE0, // 11EE0..11EF8; MAKASAR 6964 0x11EF9, // 11EF9..11EFF; UNKNOWN 6965 0x11F00, // 11F00..11F10; KAWI 6966 0x11F11, // 11F11 ; UNKNOWN 6967 0x11F12, // 11F12..11F3A; KAWI 6968 0x11F3B, // 11F3B..11F3D; UNKNOWN 6969 0x11F3E, // 11F3E..11F5A; KAWI 6970 0x11F5B, // 11F5B..11FAF; UNKNOWN 6971 0x11FB0, // 11FB0 ; LISU 6972 0x11FB1, // 11FB1..11FBF; UNKNOWN 6973 0x11FC0, // 11FC0..11FF1; TAMIL 6974 0x11FF2, // 11FF2..11FFE; UNKNOWN 6975 0x11FFF, // 11FFF ; TAMIL 6976 0x12000, // 12000..12399; CUNEIFORM 6977 0x1239A, // 1239A..123FF; UNKNOWN 6978 0x12400, // 12400..1246E; CUNEIFORM 6979 0x1246F, // 1246F ; UNKNOWN 6980 0x12470, // 12470..12474; CUNEIFORM 6981 0x12475, // 12475..1247F; UNKNOWN 6982 0x12480, // 12480..12543; CUNEIFORM 6983 0x12544, // 12544..12F8F; UNKNOWN 6984 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6985 0x12FF3, // 12FF3..12FFF; UNKNOWN 6986 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6987 0x13456, // 13456..1345F; UNKNOWN 6988 0x13460, // 13460..143FA; EGYPTIAN_HIEROGLYPHS 6989 0x143FB, // 143FB..143FF; UNKNOWN 6990 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6991 0x14647, // 14647..160FF; UNKNOWN 6992 0x16100, // 16100..16139; GURUNG_KHEMA 6993 0x1613A, // 1613A..167FF; UNKNOWN 6994 0x16800, // 16800..16A38; BAMUM 6995 0x16A39, // 16A39..16A3F; UNKNOWN 6996 0x16A40, // 16A40..16A5E; MRO 6997 0x16A5F, // 16A5F ; UNKNOWN 6998 0x16A60, // 16A60..16A69; MRO 6999 0x16A6A, // 16A6A..16A6D; UNKNOWN 7000 0x16A6E, // 16A6E..16A6F; MRO 7001 0x16A70, // 16A70..16ABE; TANGSA 7002 0x16ABF, // 16ABF ; UNKNOWN 7003 0x16AC0, // 16AC0..16AC9; TANGSA 7004 0x16ACA, // 16ACA..16ACF; UNKNOWN 7005 0x16AD0, // 16AD0..16AED; BASSA_VAH 7006 0x16AEE, // 16AEE..16AEF; UNKNOWN 7007 0x16AF0, // 16AF0..16AF5; BASSA_VAH 7008 0x16AF6, // 16AF6..16AFF; UNKNOWN 7009 0x16B00, // 16B00..16B45; PAHAWH_HMONG 7010 0x16B46, // 16B46..16B4F; UNKNOWN 7011 0x16B50, // 16B50..16B59; PAHAWH_HMONG 7012 0x16B5A, // 16B5A ; UNKNOWN 7013 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 7014 0x16B62, // 16B62 ; UNKNOWN 7015 0x16B63, // 16B63..16B77; PAHAWH_HMONG 7016 0x16B78, // 16B78..16B7C; UNKNOWN 7017 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 7018 0x16B90, // 16B90..16D3F; UNKNOWN 7019 0x16D40, // 16D40..16D79; KIRAT_RAI 7020 0x16D7A, // 16D7A..16E3F; UNKNOWN 7021 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 7022 0x16E9B, // 16E9B..16E9F; UNKNOWN 7023 0x16EA0, // 16EA0..16EB8; BERIA_ERFE 7024 0x16EB9, // 16EB9..16EBA; UNKNOWN 7025 0x16EBB, // 16EBB..16ED3; BERIA_ERFE 7026 0x16ED4, // 16ED4..16EFF; UNKNOWN 7027 0x16F00, // 16F00..16F4A; MIAO 7028 0x16F4B, // 16F4B..16F4E; UNKNOWN 7029 0x16F4F, // 16F4F..16F87; MIAO 7030 0x16F88, // 16F88..16F8E; UNKNOWN 7031 0x16F8F, // 16F8F..16F9F; MIAO 7032 0x16FA0, // 16FA0..16FDF; UNKNOWN 7033 0x16FE0, // 16FE0 ; TANGUT 7034 0x16FE1, // 16FE1 ; NUSHU 7035 0x16FE2, // 16FE2..16FE3; HAN 7036 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 7037 0x16FE5, // 16FE5..16FEF; UNKNOWN 7038 0x16FF0, // 16FF0..16FF6; HAN 7039 0x16FF7, // 16FF7..16FFF; UNKNOWN 7040 0x17000, // 17000..18AFF; TANGUT 7041 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 7042 0x18CD6, // 18CD6..18CFE; UNKNOWN 7043 0x18CFF, // 18CFF ; KHITAN_SMALL_SCRIPT 7044 0x18D00, // 18D00..18D1E; TANGUT 7045 0x18D1F, // 18D1F..18D7F; UNKNOWN 7046 0x18D80, // 18D80..18DF2; TANGUT 7047 0x18DF3, // 18DF3..1AFEF; UNKNOWN 7048 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 7049 0x1AFF4, // 1AFF4 ; UNKNOWN 7050 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 7051 0x1AFFC, // 1AFFC ; UNKNOWN 7052 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 7053 0x1AFFF, // 1AFFF ; UNKNOWN 7054 0x1B000, // 1B000 ; KATAKANA 7055 0x1B001, // 1B001..1B11F; HIRAGANA 7056 0x1B120, // 1B120..1B122; KATAKANA 7057 0x1B123, // 1B123..1B131; UNKNOWN 7058 0x1B132, // 1B132 ; HIRAGANA 7059 0x1B133, // 1B133..1B14F; UNKNOWN 7060 0x1B150, // 1B150..1B152; HIRAGANA 7061 0x1B153, // 1B153..1B154; UNKNOWN 7062 0x1B155, // 1B155 ; KATAKANA 7063 0x1B156, // 1B156..1B163; UNKNOWN 7064 0x1B164, // 1B164..1B167; KATAKANA 7065 0x1B168, // 1B168..1B16F; UNKNOWN 7066 0x1B170, // 1B170..1B2FB; NUSHU 7067 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 7068 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 7069 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 7070 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 7071 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 7072 0x1BC80, // 1BC80..1BC88; DUPLOYAN 7073 0x1BC89, // 1BC89..1BC8F; UNKNOWN 7074 0x1BC90, // 1BC90..1BC99; DUPLOYAN 7075 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 7076 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 7077 0x1BCA0, // 1BCA0..1BCA3; COMMON 7078 0x1BCA4, // 1BCA4..1CBFF; UNKNOWN 7079 0x1CC00, // 1CC00..1CCFC; COMMON 7080 0x1CCFD, // 1CCFD..1CCFF; UNKNOWN 7081 0x1CD00, // 1CD00..1CEB3; COMMON 7082 0x1CEB4, // 1CEB4..1CEB9; UNKNOWN 7083 0x1CEBA, // 1CEBA..1CED0; COMMON 7084 0x1CED1, // 1CED1..1CEDF; UNKNOWN 7085 0x1CEE0, // 1CEE0..1CEF0; COMMON 7086 0x1CEF1, // 1CEF1..1CEFF; UNKNOWN 7087 0x1CF00, // 1CF00..1CF2D; INHERITED 7088 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 7089 0x1CF30, // 1CF30..1CF46; INHERITED 7090 0x1CF47, // 1CF47..1CF4F; UNKNOWN 7091 0x1CF50, // 1CF50..1CFC3; COMMON 7092 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 7093 0x1D000, // 1D000..1D0F5; COMMON 7094 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 7095 0x1D100, // 1D100..1D126; COMMON 7096 0x1D127, // 1D127..1D128; UNKNOWN 7097 0x1D129, // 1D129..1D166; COMMON 7098 0x1D167, // 1D167..1D169; INHERITED 7099 0x1D16A, // 1D16A..1D17A; COMMON 7100 0x1D17B, // 1D17B..1D182; INHERITED 7101 0x1D183, // 1D183..1D184; COMMON 7102 0x1D185, // 1D185..1D18B; INHERITED 7103 0x1D18C, // 1D18C..1D1A9; COMMON 7104 0x1D1AA, // 1D1AA..1D1AD; INHERITED 7105 0x1D1AE, // 1D1AE..1D1EA; COMMON 7106 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 7107 0x1D200, // 1D200..1D245; GREEK 7108 0x1D246, // 1D246..1D2BF; UNKNOWN 7109 0x1D2C0, // 1D2C0..1D2D3; COMMON 7110 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 7111 0x1D2E0, // 1D2E0..1D2F3; COMMON 7112 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 7113 0x1D300, // 1D300..1D356; COMMON 7114 0x1D357, // 1D357..1D35F; UNKNOWN 7115 0x1D360, // 1D360..1D378; COMMON 7116 0x1D379, // 1D379..1D3FF; UNKNOWN 7117 0x1D400, // 1D400..1D454; COMMON 7118 0x1D455, // 1D455 ; UNKNOWN 7119 0x1D456, // 1D456..1D49C; COMMON 7120 0x1D49D, // 1D49D ; UNKNOWN 7121 0x1D49E, // 1D49E..1D49F; COMMON 7122 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 7123 0x1D4A2, // 1D4A2 ; COMMON 7124 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 7125 0x1D4A5, // 1D4A5..1D4A6; COMMON 7126 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 7127 0x1D4A9, // 1D4A9..1D4AC; COMMON 7128 0x1D4AD, // 1D4AD ; UNKNOWN 7129 0x1D4AE, // 1D4AE..1D4B9; COMMON 7130 0x1D4BA, // 1D4BA ; UNKNOWN 7131 0x1D4BB, // 1D4BB ; COMMON 7132 0x1D4BC, // 1D4BC ; UNKNOWN 7133 0x1D4BD, // 1D4BD..1D4C3; COMMON 7134 0x1D4C4, // 1D4C4 ; UNKNOWN 7135 0x1D4C5, // 1D4C5..1D505; COMMON 7136 0x1D506, // 1D506 ; UNKNOWN 7137 0x1D507, // 1D507..1D50A; COMMON 7138 0x1D50B, // 1D50B..1D50C; UNKNOWN 7139 0x1D50D, // 1D50D..1D514; COMMON 7140 0x1D515, // 1D515 ; UNKNOWN 7141 0x1D516, // 1D516..1D51C; COMMON 7142 0x1D51D, // 1D51D ; UNKNOWN 7143 0x1D51E, // 1D51E..1D539; COMMON 7144 0x1D53A, // 1D53A ; UNKNOWN 7145 0x1D53B, // 1D53B..1D53E; COMMON 7146 0x1D53F, // 1D53F ; UNKNOWN 7147 0x1D540, // 1D540..1D544; COMMON 7148 0x1D545, // 1D545 ; UNKNOWN 7149 0x1D546, // 1D546 ; COMMON 7150 0x1D547, // 1D547..1D549; UNKNOWN 7151 0x1D54A, // 1D54A..1D550; COMMON 7152 0x1D551, // 1D551 ; UNKNOWN 7153 0x1D552, // 1D552..1D6A5; COMMON 7154 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 7155 0x1D6A8, // 1D6A8..1D7CB; COMMON 7156 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 7157 0x1D7CE, // 1D7CE..1D7FF; COMMON 7158 0x1D800, // 1D800..1DA8B; SIGNWRITING 7159 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 7160 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 7161 0x1DAA0, // 1DAA0 ; UNKNOWN 7162 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 7163 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 7164 0x1DF00, // 1DF00..1DF1E; LATIN 7165 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 7166 0x1DF25, // 1DF25..1DF2A; LATIN 7167 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 7168 0x1E000, // 1E000..1E006; GLAGOLITIC 7169 0x1E007, // 1E007 ; UNKNOWN 7170 0x1E008, // 1E008..1E018; GLAGOLITIC 7171 0x1E019, // 1E019..1E01A; UNKNOWN 7172 0x1E01B, // 1E01B..1E021; GLAGOLITIC 7173 0x1E022, // 1E022 ; UNKNOWN 7174 0x1E023, // 1E023..1E024; GLAGOLITIC 7175 0x1E025, // 1E025 ; UNKNOWN 7176 0x1E026, // 1E026..1E02A; GLAGOLITIC 7177 0x1E02B, // 1E02B..1E02F; UNKNOWN 7178 0x1E030, // 1E030..1E06D; CYRILLIC 7179 0x1E06E, // 1E06E..1E08E; UNKNOWN 7180 0x1E08F, // 1E08F ; CYRILLIC 7181 0x1E090, // 1E090..1E0FF; UNKNOWN 7182 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 7183 0x1E12D, // 1E12D..1E12F; UNKNOWN 7184 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 7185 0x1E13E, // 1E13E..1E13F; UNKNOWN 7186 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 7187 0x1E14A, // 1E14A..1E14D; UNKNOWN 7188 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 7189 0x1E150, // 1E150..1E28F; UNKNOWN 7190 0x1E290, // 1E290..1E2AE; TOTO 7191 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 7192 0x1E2C0, // 1E2C0..1E2F9; WANCHO 7193 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 7194 0x1E2FF, // 1E2FF ; WANCHO 7195 0x1E300, // 1E300..1E4CF; UNKNOWN 7196 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 7197 0x1E4FA, // 1E4FA..1E5CF; UNKNOWN 7198 0x1E5D0, // 1E5D0..1E5FA; OL_ONAL 7199 0x1E5FB, // 1E5FB..1E5FE; UNKNOWN 7200 0x1E5FF, // 1E5FF ; OL_ONAL 7201 0x1E600, // 1E600..1E6BF; UNKNOWN 7202 0x1E6C0, // 1E6C0..1E6DE; TAI_YO 7203 0x1E6DF, // 1E6DF ; UNKNOWN 7204 0x1E6E0, // 1E6E0..1E6F5; TAI_YO 7205 0x1E6F6, // 1E6F6..1E6FD; UNKNOWN 7206 0x1E6FE, // 1E6FE..1E6FF; TAI_YO 7207 0x1E700, // 1E700..1E7DF; UNKNOWN 7208 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 7209 0x1E7E7, // 1E7E7 ; UNKNOWN 7210 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 7211 0x1E7EC, // 1E7EC ; UNKNOWN 7212 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 7213 0x1E7EF, // 1E7EF ; UNKNOWN 7214 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 7215 0x1E7FF, // 1E7FF ; UNKNOWN 7216 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 7217 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 7218 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 7219 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 7220 0x1E900, // 1E900..1E94B; ADLAM 7221 0x1E94C, // 1E94C..1E94F; UNKNOWN 7222 0x1E950, // 1E950..1E959; ADLAM 7223 0x1E95A, // 1E95A..1E95D; UNKNOWN 7224 0x1E95E, // 1E95E..1E95F; ADLAM 7225 0x1E960, // 1E960..1EC70; UNKNOWN 7226 0x1EC71, // 1EC71..1ECB4; COMMON 7227 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 7228 0x1ED01, // 1ED01..1ED3D; COMMON 7229 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 7230 0x1EE00, // 1EE00..1EE03; ARABIC 7231 0x1EE04, // 1EE04 ; UNKNOWN 7232 0x1EE05, // 1EE05..1EE1F; ARABIC 7233 0x1EE20, // 1EE20 ; UNKNOWN 7234 0x1EE21, // 1EE21..1EE22; ARABIC 7235 0x1EE23, // 1EE23 ; UNKNOWN 7236 0x1EE24, // 1EE24 ; ARABIC 7237 0x1EE25, // 1EE25..1EE26; UNKNOWN 7238 0x1EE27, // 1EE27 ; ARABIC 7239 0x1EE28, // 1EE28 ; UNKNOWN 7240 0x1EE29, // 1EE29..1EE32; ARABIC 7241 0x1EE33, // 1EE33 ; UNKNOWN 7242 0x1EE34, // 1EE34..1EE37; ARABIC 7243 0x1EE38, // 1EE38 ; UNKNOWN 7244 0x1EE39, // 1EE39 ; ARABIC 7245 0x1EE3A, // 1EE3A ; UNKNOWN 7246 0x1EE3B, // 1EE3B ; ARABIC 7247 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 7248 0x1EE42, // 1EE42 ; ARABIC 7249 0x1EE43, // 1EE43..1EE46; UNKNOWN 7250 0x1EE47, // 1EE47 ; ARABIC 7251 0x1EE48, // 1EE48 ; UNKNOWN 7252 0x1EE49, // 1EE49 ; ARABIC 7253 0x1EE4A, // 1EE4A ; UNKNOWN 7254 0x1EE4B, // 1EE4B ; ARABIC 7255 0x1EE4C, // 1EE4C ; UNKNOWN 7256 0x1EE4D, // 1EE4D..1EE4F; ARABIC 7257 0x1EE50, // 1EE50 ; UNKNOWN 7258 0x1EE51, // 1EE51..1EE52; ARABIC 7259 0x1EE53, // 1EE53 ; UNKNOWN 7260 0x1EE54, // 1EE54 ; ARABIC 7261 0x1EE55, // 1EE55..1EE56; UNKNOWN 7262 0x1EE57, // 1EE57 ; ARABIC 7263 0x1EE58, // 1EE58 ; UNKNOWN 7264 0x1EE59, // 1EE59 ; ARABIC 7265 0x1EE5A, // 1EE5A ; UNKNOWN 7266 0x1EE5B, // 1EE5B ; ARABIC 7267 0x1EE5C, // 1EE5C ; UNKNOWN 7268 0x1EE5D, // 1EE5D ; ARABIC 7269 0x1EE5E, // 1EE5E ; UNKNOWN 7270 0x1EE5F, // 1EE5F ; ARABIC 7271 0x1EE60, // 1EE60 ; UNKNOWN 7272 0x1EE61, // 1EE61..1EE62; ARABIC 7273 0x1EE63, // 1EE63 ; UNKNOWN 7274 0x1EE64, // 1EE64 ; ARABIC 7275 0x1EE65, // 1EE65..1EE66; UNKNOWN 7276 0x1EE67, // 1EE67..1EE6A; ARABIC 7277 0x1EE6B, // 1EE6B ; UNKNOWN 7278 0x1EE6C, // 1EE6C..1EE72; ARABIC 7279 0x1EE73, // 1EE73 ; UNKNOWN 7280 0x1EE74, // 1EE74..1EE77; ARABIC 7281 0x1EE78, // 1EE78 ; UNKNOWN 7282 0x1EE79, // 1EE79..1EE7C; ARABIC 7283 0x1EE7D, // 1EE7D ; UNKNOWN 7284 0x1EE7E, // 1EE7E ; ARABIC 7285 0x1EE7F, // 1EE7F ; UNKNOWN 7286 0x1EE80, // 1EE80..1EE89; ARABIC 7287 0x1EE8A, // 1EE8A ; UNKNOWN 7288 0x1EE8B, // 1EE8B..1EE9B; ARABIC 7289 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 7290 0x1EEA1, // 1EEA1..1EEA3; ARABIC 7291 0x1EEA4, // 1EEA4 ; UNKNOWN 7292 0x1EEA5, // 1EEA5..1EEA9; ARABIC 7293 0x1EEAA, // 1EEAA ; UNKNOWN 7294 0x1EEAB, // 1EEAB..1EEBB; ARABIC 7295 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 7296 0x1EEF0, // 1EEF0..1EEF1; ARABIC 7297 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 7298 0x1F000, // 1F000..1F02B; COMMON 7299 0x1F02C, // 1F02C..1F02F; UNKNOWN 7300 0x1F030, // 1F030..1F093; COMMON 7301 0x1F094, // 1F094..1F09F; UNKNOWN 7302 0x1F0A0, // 1F0A0..1F0AE; COMMON 7303 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 7304 0x1F0B1, // 1F0B1..1F0BF; COMMON 7305 0x1F0C0, // 1F0C0 ; UNKNOWN 7306 0x1F0C1, // 1F0C1..1F0CF; COMMON 7307 0x1F0D0, // 1F0D0 ; UNKNOWN 7308 0x1F0D1, // 1F0D1..1F0F5; COMMON 7309 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 7310 0x1F100, // 1F100..1F1AD; COMMON 7311 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 7312 0x1F1E6, // 1F1E6..1F1FF; COMMON 7313 0x1F200, // 1F200 ; HIRAGANA 7314 0x1F201, // 1F201..1F202; COMMON 7315 0x1F203, // 1F203..1F20F; UNKNOWN 7316 0x1F210, // 1F210..1F23B; COMMON 7317 0x1F23C, // 1F23C..1F23F; UNKNOWN 7318 0x1F240, // 1F240..1F248; COMMON 7319 0x1F249, // 1F249..1F24F; UNKNOWN 7320 0x1F250, // 1F250..1F251; COMMON 7321 0x1F252, // 1F252..1F25F; UNKNOWN 7322 0x1F260, // 1F260..1F265; COMMON 7323 0x1F266, // 1F266..1F2FF; UNKNOWN 7324 0x1F300, // 1F300..1F6D8; COMMON 7325 0x1F6D9, // 1F6D9..1F6DB; UNKNOWN 7326 0x1F6DC, // 1F6DC..1F6EC; COMMON 7327 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 7328 0x1F6F0, // 1F6F0..1F6FC; COMMON 7329 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 7330 0x1F700, // 1F700..1F7D9; COMMON 7331 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 7332 0x1F7E0, // 1F7E0..1F7EB; COMMON 7333 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 7334 0x1F7F0, // 1F7F0 ; COMMON 7335 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 7336 0x1F800, // 1F800..1F80B; COMMON 7337 0x1F80C, // 1F80C..1F80F; UNKNOWN 7338 0x1F810, // 1F810..1F847; COMMON 7339 0x1F848, // 1F848..1F84F; UNKNOWN 7340 0x1F850, // 1F850..1F859; COMMON 7341 0x1F85A, // 1F85A..1F85F; UNKNOWN 7342 0x1F860, // 1F860..1F887; COMMON 7343 0x1F888, // 1F888..1F88F; UNKNOWN 7344 0x1F890, // 1F890..1F8AD; COMMON 7345 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7346 0x1F8B0, // 1F8B0..1F8BB; COMMON 7347 0x1F8BC, // 1F8BC..1F8BF; UNKNOWN 7348 0x1F8C0, // 1F8C0..1F8C1; COMMON 7349 0x1F8C2, // 1F8C2..1F8CF; UNKNOWN 7350 0x1F8D0, // 1F8D0..1F8D8; COMMON 7351 0x1F8D9, // 1F8D9..1F8FF; UNKNOWN 7352 0x1F900, // 1F900..1FA57; COMMON 7353 0x1FA58, // 1FA58..1FA5F; UNKNOWN 7354 0x1FA60, // 1FA60..1FA6D; COMMON 7355 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7356 0x1FA70, // 1FA70..1FA7C; COMMON 7357 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7358 0x1FA80, // 1FA80..1FA8A; COMMON 7359 0x1FA8B, // 1FA8B..1FA8D; UNKNOWN 7360 0x1FA8E, // 1FA8E..1FAC6; COMMON 7361 0x1FAC7, // 1FAC7 ; UNKNOWN 7362 0x1FAC8, // 1FAC8 ; COMMON 7363 0x1FAC9, // 1FAC9..1FACC; UNKNOWN 7364 0x1FACD, // 1FACD..1FADC; COMMON 7365 0x1FADD, // 1FADD..1FADE; UNKNOWN 7366 0x1FADF, // 1FADF..1FAEA; COMMON 7367 0x1FAEB, // 1FAEB..1FAEE; UNKNOWN 7368 0x1FAEF, // 1FAEF..1FAF8; COMMON 7369 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7370 0x1FB00, // 1FB00..1FB92; COMMON 7371 0x1FB93, // 1FB93 ; UNKNOWN 7372 0x1FB94, // 1FB94..1FBFA; COMMON 7373 0x1FBFB, // 1FBFB..1FFFF; UNKNOWN 7374 0x20000, // 20000..2A6DF; HAN 7375 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7376 0x2A700, // 2A700..2B81D; HAN 7377 0x2B81E, // 2B81E..2B81F; UNKNOWN 7378 0x2B820, // 2B820..2CEAD; HAN 7379 0x2CEAE, // 2CEAE..2CEAF; UNKNOWN 7380 0x2CEB0, // 2CEB0..2EBE0; HAN 7381 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN 7382 0x2EBF0, // 2EBF0..2EE5D; HAN 7383 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN 7384 0x2F800, // 2F800..2FA1D; HAN 7385 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7386 0x30000, // 30000..3134A; HAN 7387 0x3134B, // 3134B..3134F; UNKNOWN 7388 0x31350, // 31350..33479; HAN 7389 0x3347A, // 3347A..E0000; UNKNOWN 7390 0xE0001, // E0001 ; COMMON 7391 0xE0002, // E0002..E001F; UNKNOWN 7392 0xE0020, // E0020..E007F; COMMON 7393 0xE0080, // E0080..E00FF; UNKNOWN 7394 0xE0100, // E0100..E01EF; INHERITED 7395 0xE01F0, // E01F0..10FFFF; UNKNOWN 7396 }; 7397 7398 private static final UnicodeScript[] scripts = { 7399 COMMON, // 0000..0040 7400 LATIN, // 0041..005A 7401 COMMON, // 005B..0060 7402 LATIN, // 0061..007A 7403 COMMON, // 007B..00A9 7404 LATIN, // 00AA 7405 COMMON, // 00AB..00B9 7406 LATIN, // 00BA 7407 COMMON, // 00BB..00BF 7408 LATIN, // 00C0..00D6 7409 COMMON, // 00D7 7410 LATIN, // 00D8..00F6 7411 COMMON, // 00F7 7412 LATIN, // 00F8..02B8 7413 COMMON, // 02B9..02DF 7414 LATIN, // 02E0..02E4 7415 COMMON, // 02E5..02E9 7416 BOPOMOFO, // 02EA..02EB 7417 COMMON, // 02EC..02FF 7418 INHERITED, // 0300..036F 7419 GREEK, // 0370..0373 7420 COMMON, // 0374 7421 GREEK, // 0375..0377 7422 UNKNOWN, // 0378..0379 7423 GREEK, // 037A..037D 7424 COMMON, // 037E 7425 GREEK, // 037F 7426 UNKNOWN, // 0380..0383 7427 GREEK, // 0384 7428 COMMON, // 0385 7429 GREEK, // 0386 7430 COMMON, // 0387 7431 GREEK, // 0388..038A 7432 UNKNOWN, // 038B 7433 GREEK, // 038C 7434 UNKNOWN, // 038D 7435 GREEK, // 038E..03A1 7436 UNKNOWN, // 03A2 7437 GREEK, // 03A3..03E1 7438 COPTIC, // 03E2..03EF 7439 GREEK, // 03F0..03FF 7440 CYRILLIC, // 0400..0484 7441 INHERITED, // 0485..0486 7442 CYRILLIC, // 0487..052F 7443 UNKNOWN, // 0530 7444 ARMENIAN, // 0531..0556 7445 UNKNOWN, // 0557..0558 7446 ARMENIAN, // 0559..058A 7447 UNKNOWN, // 058B..058C 7448 ARMENIAN, // 058D..058F 7449 UNKNOWN, // 0590 7450 HEBREW, // 0591..05C7 7451 UNKNOWN, // 05C8..05CF 7452 HEBREW, // 05D0..05EA 7453 UNKNOWN, // 05EB..05EE 7454 HEBREW, // 05EF..05F4 7455 UNKNOWN, // 05F5..05FF 7456 ARABIC, // 0600..0604 7457 COMMON, // 0605 7458 ARABIC, // 0606..060B 7459 COMMON, // 060C 7460 ARABIC, // 060D..061A 7461 COMMON, // 061B 7462 ARABIC, // 061C..061E 7463 COMMON, // 061F 7464 ARABIC, // 0620..063F 7465 COMMON, // 0640 7466 ARABIC, // 0641..064A 7467 INHERITED, // 064B..0655 7468 ARABIC, // 0656..066F 7469 INHERITED, // 0670 7470 ARABIC, // 0671..06DC 7471 COMMON, // 06DD 7472 ARABIC, // 06DE..06FF 7473 SYRIAC, // 0700..070D 7474 UNKNOWN, // 070E 7475 SYRIAC, // 070F..074A 7476 UNKNOWN, // 074B..074C 7477 SYRIAC, // 074D..074F 7478 ARABIC, // 0750..077F 7479 THAANA, // 0780..07B1 7480 UNKNOWN, // 07B2..07BF 7481 NKO, // 07C0..07FA 7482 UNKNOWN, // 07FB..07FC 7483 NKO, // 07FD..07FF 7484 SAMARITAN, // 0800..082D 7485 UNKNOWN, // 082E..082F 7486 SAMARITAN, // 0830..083E 7487 UNKNOWN, // 083F 7488 MANDAIC, // 0840..085B 7489 UNKNOWN, // 085C..085D 7490 MANDAIC, // 085E 7491 UNKNOWN, // 085F 7492 SYRIAC, // 0860..086A 7493 UNKNOWN, // 086B..086F 7494 ARABIC, // 0870..0891 7495 UNKNOWN, // 0892..0896 7496 ARABIC, // 0897..08E1 7497 COMMON, // 08E2 7498 ARABIC, // 08E3..08FF 7499 DEVANAGARI, // 0900..0950 7500 INHERITED, // 0951..0954 7501 DEVANAGARI, // 0955..0963 7502 COMMON, // 0964..0965 7503 DEVANAGARI, // 0966..097F 7504 BENGALI, // 0980..0983 7505 UNKNOWN, // 0984 7506 BENGALI, // 0985..098C 7507 UNKNOWN, // 098D..098E 7508 BENGALI, // 098F..0990 7509 UNKNOWN, // 0991..0992 7510 BENGALI, // 0993..09A8 7511 UNKNOWN, // 09A9 7512 BENGALI, // 09AA..09B0 7513 UNKNOWN, // 09B1 7514 BENGALI, // 09B2 7515 UNKNOWN, // 09B3..09B5 7516 BENGALI, // 09B6..09B9 7517 UNKNOWN, // 09BA..09BB 7518 BENGALI, // 09BC..09C4 7519 UNKNOWN, // 09C5..09C6 7520 BENGALI, // 09C7..09C8 7521 UNKNOWN, // 09C9..09CA 7522 BENGALI, // 09CB..09CE 7523 UNKNOWN, // 09CF..09D6 7524 BENGALI, // 09D7 7525 UNKNOWN, // 09D8..09DB 7526 BENGALI, // 09DC..09DD 7527 UNKNOWN, // 09DE 7528 BENGALI, // 09DF..09E3 7529 UNKNOWN, // 09E4..09E5 7530 BENGALI, // 09E6..09FE 7531 UNKNOWN, // 09FF..0A00 7532 GURMUKHI, // 0A01..0A03 7533 UNKNOWN, // 0A04 7534 GURMUKHI, // 0A05..0A0A 7535 UNKNOWN, // 0A0B..0A0E 7536 GURMUKHI, // 0A0F..0A10 7537 UNKNOWN, // 0A11..0A12 7538 GURMUKHI, // 0A13..0A28 7539 UNKNOWN, // 0A29 7540 GURMUKHI, // 0A2A..0A30 7541 UNKNOWN, // 0A31 7542 GURMUKHI, // 0A32..0A33 7543 UNKNOWN, // 0A34 7544 GURMUKHI, // 0A35..0A36 7545 UNKNOWN, // 0A37 7546 GURMUKHI, // 0A38..0A39 7547 UNKNOWN, // 0A3A..0A3B 7548 GURMUKHI, // 0A3C 7549 UNKNOWN, // 0A3D 7550 GURMUKHI, // 0A3E..0A42 7551 UNKNOWN, // 0A43..0A46 7552 GURMUKHI, // 0A47..0A48 7553 UNKNOWN, // 0A49..0A4A 7554 GURMUKHI, // 0A4B..0A4D 7555 UNKNOWN, // 0A4E..0A50 7556 GURMUKHI, // 0A51 7557 UNKNOWN, // 0A52..0A58 7558 GURMUKHI, // 0A59..0A5C 7559 UNKNOWN, // 0A5D 7560 GURMUKHI, // 0A5E 7561 UNKNOWN, // 0A5F..0A65 7562 GURMUKHI, // 0A66..0A76 7563 UNKNOWN, // 0A77..0A80 7564 GUJARATI, // 0A81..0A83 7565 UNKNOWN, // 0A84 7566 GUJARATI, // 0A85..0A8D 7567 UNKNOWN, // 0A8E 7568 GUJARATI, // 0A8F..0A91 7569 UNKNOWN, // 0A92 7570 GUJARATI, // 0A93..0AA8 7571 UNKNOWN, // 0AA9 7572 GUJARATI, // 0AAA..0AB0 7573 UNKNOWN, // 0AB1 7574 GUJARATI, // 0AB2..0AB3 7575 UNKNOWN, // 0AB4 7576 GUJARATI, // 0AB5..0AB9 7577 UNKNOWN, // 0ABA..0ABB 7578 GUJARATI, // 0ABC..0AC5 7579 UNKNOWN, // 0AC6 7580 GUJARATI, // 0AC7..0AC9 7581 UNKNOWN, // 0ACA 7582 GUJARATI, // 0ACB..0ACD 7583 UNKNOWN, // 0ACE..0ACF 7584 GUJARATI, // 0AD0 7585 UNKNOWN, // 0AD1..0ADF 7586 GUJARATI, // 0AE0..0AE3 7587 UNKNOWN, // 0AE4..0AE5 7588 GUJARATI, // 0AE6..0AF1 7589 UNKNOWN, // 0AF2..0AF8 7590 GUJARATI, // 0AF9..0AFF 7591 UNKNOWN, // 0B00 7592 ORIYA, // 0B01..0B03 7593 UNKNOWN, // 0B04 7594 ORIYA, // 0B05..0B0C 7595 UNKNOWN, // 0B0D..0B0E 7596 ORIYA, // 0B0F..0B10 7597 UNKNOWN, // 0B11..0B12 7598 ORIYA, // 0B13..0B28 7599 UNKNOWN, // 0B29 7600 ORIYA, // 0B2A..0B30 7601 UNKNOWN, // 0B31 7602 ORIYA, // 0B32..0B33 7603 UNKNOWN, // 0B34 7604 ORIYA, // 0B35..0B39 7605 UNKNOWN, // 0B3A..0B3B 7606 ORIYA, // 0B3C..0B44 7607 UNKNOWN, // 0B45..0B46 7608 ORIYA, // 0B47..0B48 7609 UNKNOWN, // 0B49..0B4A 7610 ORIYA, // 0B4B..0B4D 7611 UNKNOWN, // 0B4E..0B54 7612 ORIYA, // 0B55..0B57 7613 UNKNOWN, // 0B58..0B5B 7614 ORIYA, // 0B5C..0B5D 7615 UNKNOWN, // 0B5E 7616 ORIYA, // 0B5F..0B63 7617 UNKNOWN, // 0B64..0B65 7618 ORIYA, // 0B66..0B77 7619 UNKNOWN, // 0B78..0B81 7620 TAMIL, // 0B82..0B83 7621 UNKNOWN, // 0B84 7622 TAMIL, // 0B85..0B8A 7623 UNKNOWN, // 0B8B..0B8D 7624 TAMIL, // 0B8E..0B90 7625 UNKNOWN, // 0B91 7626 TAMIL, // 0B92..0B95 7627 UNKNOWN, // 0B96..0B98 7628 TAMIL, // 0B99..0B9A 7629 UNKNOWN, // 0B9B 7630 TAMIL, // 0B9C 7631 UNKNOWN, // 0B9D 7632 TAMIL, // 0B9E..0B9F 7633 UNKNOWN, // 0BA0..0BA2 7634 TAMIL, // 0BA3..0BA4 7635 UNKNOWN, // 0BA5..0BA7 7636 TAMIL, // 0BA8..0BAA 7637 UNKNOWN, // 0BAB..0BAD 7638 TAMIL, // 0BAE..0BB9 7639 UNKNOWN, // 0BBA..0BBD 7640 TAMIL, // 0BBE..0BC2 7641 UNKNOWN, // 0BC3..0BC5 7642 TAMIL, // 0BC6..0BC8 7643 UNKNOWN, // 0BC9 7644 TAMIL, // 0BCA..0BCD 7645 UNKNOWN, // 0BCE..0BCF 7646 TAMIL, // 0BD0 7647 UNKNOWN, // 0BD1..0BD6 7648 TAMIL, // 0BD7 7649 UNKNOWN, // 0BD8..0BE5 7650 TAMIL, // 0BE6..0BFA 7651 UNKNOWN, // 0BFB..0BFF 7652 TELUGU, // 0C00..0C0C 7653 UNKNOWN, // 0C0D 7654 TELUGU, // 0C0E..0C10 7655 UNKNOWN, // 0C11 7656 TELUGU, // 0C12..0C28 7657 UNKNOWN, // 0C29 7658 TELUGU, // 0C2A..0C39 7659 UNKNOWN, // 0C3A..0C3B 7660 TELUGU, // 0C3C..0C44 7661 UNKNOWN, // 0C45 7662 TELUGU, // 0C46..0C48 7663 UNKNOWN, // 0C49 7664 TELUGU, // 0C4A..0C4D 7665 UNKNOWN, // 0C4E..0C54 7666 TELUGU, // 0C55..0C56 7667 UNKNOWN, // 0C57 7668 TELUGU, // 0C58..0C5A 7669 UNKNOWN, // 0C5B 7670 TELUGU, // 0C5C..0C5D 7671 UNKNOWN, // 0C5E..0C5F 7672 TELUGU, // 0C60..0C63 7673 UNKNOWN, // 0C64..0C65 7674 TELUGU, // 0C66..0C6F 7675 UNKNOWN, // 0C70..0C76 7676 TELUGU, // 0C77..0C7F 7677 KANNADA, // 0C80..0C8C 7678 UNKNOWN, // 0C8D 7679 KANNADA, // 0C8E..0C90 7680 UNKNOWN, // 0C91 7681 KANNADA, // 0C92..0CA8 7682 UNKNOWN, // 0CA9 7683 KANNADA, // 0CAA..0CB3 7684 UNKNOWN, // 0CB4 7685 KANNADA, // 0CB5..0CB9 7686 UNKNOWN, // 0CBA..0CBB 7687 KANNADA, // 0CBC..0CC4 7688 UNKNOWN, // 0CC5 7689 KANNADA, // 0CC6..0CC8 7690 UNKNOWN, // 0CC9 7691 KANNADA, // 0CCA..0CCD 7692 UNKNOWN, // 0CCE..0CD4 7693 KANNADA, // 0CD5..0CD6 7694 UNKNOWN, // 0CD7..0CDB 7695 KANNADA, // 0CDC..0CDE 7696 UNKNOWN, // 0CDF 7697 KANNADA, // 0CE0..0CE3 7698 UNKNOWN, // 0CE4..0CE5 7699 KANNADA, // 0CE6..0CEF 7700 UNKNOWN, // 0CF0 7701 KANNADA, // 0CF1..0CF3 7702 UNKNOWN, // 0CF4..0CFF 7703 MALAYALAM, // 0D00..0D0C 7704 UNKNOWN, // 0D0D 7705 MALAYALAM, // 0D0E..0D10 7706 UNKNOWN, // 0D11 7707 MALAYALAM, // 0D12..0D44 7708 UNKNOWN, // 0D45 7709 MALAYALAM, // 0D46..0D48 7710 UNKNOWN, // 0D49 7711 MALAYALAM, // 0D4A..0D4F 7712 UNKNOWN, // 0D50..0D53 7713 MALAYALAM, // 0D54..0D63 7714 UNKNOWN, // 0D64..0D65 7715 MALAYALAM, // 0D66..0D7F 7716 UNKNOWN, // 0D80 7717 SINHALA, // 0D81..0D83 7718 UNKNOWN, // 0D84 7719 SINHALA, // 0D85..0D96 7720 UNKNOWN, // 0D97..0D99 7721 SINHALA, // 0D9A..0DB1 7722 UNKNOWN, // 0DB2 7723 SINHALA, // 0DB3..0DBB 7724 UNKNOWN, // 0DBC 7725 SINHALA, // 0DBD 7726 UNKNOWN, // 0DBE..0DBF 7727 SINHALA, // 0DC0..0DC6 7728 UNKNOWN, // 0DC7..0DC9 7729 SINHALA, // 0DCA 7730 UNKNOWN, // 0DCB..0DCE 7731 SINHALA, // 0DCF..0DD4 7732 UNKNOWN, // 0DD5 7733 SINHALA, // 0DD6 7734 UNKNOWN, // 0DD7 7735 SINHALA, // 0DD8..0DDF 7736 UNKNOWN, // 0DE0..0DE5 7737 SINHALA, // 0DE6..0DEF 7738 UNKNOWN, // 0DF0..0DF1 7739 SINHALA, // 0DF2..0DF4 7740 UNKNOWN, // 0DF5..0E00 7741 THAI, // 0E01..0E3A 7742 UNKNOWN, // 0E3B..0E3E 7743 COMMON, // 0E3F 7744 THAI, // 0E40..0E5B 7745 UNKNOWN, // 0E5C..0E80 7746 LAO, // 0E81..0E82 7747 UNKNOWN, // 0E83 7748 LAO, // 0E84 7749 UNKNOWN, // 0E85 7750 LAO, // 0E86..0E8A 7751 UNKNOWN, // 0E8B 7752 LAO, // 0E8C..0EA3 7753 UNKNOWN, // 0EA4 7754 LAO, // 0EA5 7755 UNKNOWN, // 0EA6 7756 LAO, // 0EA7..0EBD 7757 UNKNOWN, // 0EBE..0EBF 7758 LAO, // 0EC0..0EC4 7759 UNKNOWN, // 0EC5 7760 LAO, // 0EC6 7761 UNKNOWN, // 0EC7 7762 LAO, // 0EC8..0ECE 7763 UNKNOWN, // 0ECF 7764 LAO, // 0ED0..0ED9 7765 UNKNOWN, // 0EDA..0EDB 7766 LAO, // 0EDC..0EDF 7767 UNKNOWN, // 0EE0..0EFF 7768 TIBETAN, // 0F00..0F47 7769 UNKNOWN, // 0F48 7770 TIBETAN, // 0F49..0F6C 7771 UNKNOWN, // 0F6D..0F70 7772 TIBETAN, // 0F71..0F97 7773 UNKNOWN, // 0F98 7774 TIBETAN, // 0F99..0FBC 7775 UNKNOWN, // 0FBD 7776 TIBETAN, // 0FBE..0FCC 7777 UNKNOWN, // 0FCD 7778 TIBETAN, // 0FCE..0FD4 7779 COMMON, // 0FD5..0FD8 7780 TIBETAN, // 0FD9..0FDA 7781 UNKNOWN, // 0FDB..0FFF 7782 MYANMAR, // 1000..109F 7783 GEORGIAN, // 10A0..10C5 7784 UNKNOWN, // 10C6 7785 GEORGIAN, // 10C7 7786 UNKNOWN, // 10C8..10CC 7787 GEORGIAN, // 10CD 7788 UNKNOWN, // 10CE..10CF 7789 GEORGIAN, // 10D0..10FA 7790 COMMON, // 10FB 7791 GEORGIAN, // 10FC..10FF 7792 HANGUL, // 1100..11FF 7793 ETHIOPIC, // 1200..1248 7794 UNKNOWN, // 1249 7795 ETHIOPIC, // 124A..124D 7796 UNKNOWN, // 124E..124F 7797 ETHIOPIC, // 1250..1256 7798 UNKNOWN, // 1257 7799 ETHIOPIC, // 1258 7800 UNKNOWN, // 1259 7801 ETHIOPIC, // 125A..125D 7802 UNKNOWN, // 125E..125F 7803 ETHIOPIC, // 1260..1288 7804 UNKNOWN, // 1289 7805 ETHIOPIC, // 128A..128D 7806 UNKNOWN, // 128E..128F 7807 ETHIOPIC, // 1290..12B0 7808 UNKNOWN, // 12B1 7809 ETHIOPIC, // 12B2..12B5 7810 UNKNOWN, // 12B6..12B7 7811 ETHIOPIC, // 12B8..12BE 7812 UNKNOWN, // 12BF 7813 ETHIOPIC, // 12C0 7814 UNKNOWN, // 12C1 7815 ETHIOPIC, // 12C2..12C5 7816 UNKNOWN, // 12C6..12C7 7817 ETHIOPIC, // 12C8..12D6 7818 UNKNOWN, // 12D7 7819 ETHIOPIC, // 12D8..1310 7820 UNKNOWN, // 1311 7821 ETHIOPIC, // 1312..1315 7822 UNKNOWN, // 1316..1317 7823 ETHIOPIC, // 1318..135A 7824 UNKNOWN, // 135B..135C 7825 ETHIOPIC, // 135D..137C 7826 UNKNOWN, // 137D..137F 7827 ETHIOPIC, // 1380..1399 7828 UNKNOWN, // 139A..139F 7829 CHEROKEE, // 13A0..13F5 7830 UNKNOWN, // 13F6..13F7 7831 CHEROKEE, // 13F8..13FD 7832 UNKNOWN, // 13FE..13FF 7833 CANADIAN_ABORIGINAL, // 1400..167F 7834 OGHAM, // 1680..169C 7835 UNKNOWN, // 169D..169F 7836 RUNIC, // 16A0..16EA 7837 COMMON, // 16EB..16ED 7838 RUNIC, // 16EE..16F8 7839 UNKNOWN, // 16F9..16FF 7840 TAGALOG, // 1700..1715 7841 UNKNOWN, // 1716..171E 7842 TAGALOG, // 171F 7843 HANUNOO, // 1720..1734 7844 COMMON, // 1735..1736 7845 UNKNOWN, // 1737..173F 7846 BUHID, // 1740..1753 7847 UNKNOWN, // 1754..175F 7848 TAGBANWA, // 1760..176C 7849 UNKNOWN, // 176D 7850 TAGBANWA, // 176E..1770 7851 UNKNOWN, // 1771 7852 TAGBANWA, // 1772..1773 7853 UNKNOWN, // 1774..177F 7854 KHMER, // 1780..17DD 7855 UNKNOWN, // 17DE..17DF 7856 KHMER, // 17E0..17E9 7857 UNKNOWN, // 17EA..17EF 7858 KHMER, // 17F0..17F9 7859 UNKNOWN, // 17FA..17FF 7860 MONGOLIAN, // 1800..1801 7861 COMMON, // 1802..1803 7862 MONGOLIAN, // 1804 7863 COMMON, // 1805 7864 MONGOLIAN, // 1806..1819 7865 UNKNOWN, // 181A..181F 7866 MONGOLIAN, // 1820..1878 7867 UNKNOWN, // 1879..187F 7868 MONGOLIAN, // 1880..18AA 7869 UNKNOWN, // 18AB..18AF 7870 CANADIAN_ABORIGINAL, // 18B0..18F5 7871 UNKNOWN, // 18F6..18FF 7872 LIMBU, // 1900..191E 7873 UNKNOWN, // 191F 7874 LIMBU, // 1920..192B 7875 UNKNOWN, // 192C..192F 7876 LIMBU, // 1930..193B 7877 UNKNOWN, // 193C..193F 7878 LIMBU, // 1940 7879 UNKNOWN, // 1941..1943 7880 LIMBU, // 1944..194F 7881 TAI_LE, // 1950..196D 7882 UNKNOWN, // 196E..196F 7883 TAI_LE, // 1970..1974 7884 UNKNOWN, // 1975..197F 7885 NEW_TAI_LUE, // 1980..19AB 7886 UNKNOWN, // 19AC..19AF 7887 NEW_TAI_LUE, // 19B0..19C9 7888 UNKNOWN, // 19CA..19CF 7889 NEW_TAI_LUE, // 19D0..19DA 7890 UNKNOWN, // 19DB..19DD 7891 NEW_TAI_LUE, // 19DE..19DF 7892 KHMER, // 19E0..19FF 7893 BUGINESE, // 1A00..1A1B 7894 UNKNOWN, // 1A1C..1A1D 7895 BUGINESE, // 1A1E..1A1F 7896 TAI_THAM, // 1A20..1A5E 7897 UNKNOWN, // 1A5F 7898 TAI_THAM, // 1A60..1A7C 7899 UNKNOWN, // 1A7D..1A7E 7900 TAI_THAM, // 1A7F..1A89 7901 UNKNOWN, // 1A8A..1A8F 7902 TAI_THAM, // 1A90..1A99 7903 UNKNOWN, // 1A9A..1A9F 7904 TAI_THAM, // 1AA0..1AAD 7905 UNKNOWN, // 1AAE..1AAF 7906 INHERITED, // 1AB0..1ADD 7907 UNKNOWN, // 1ADE..1ADF 7908 INHERITED, // 1AE0..1AEB 7909 UNKNOWN, // 1AEC..1AFF 7910 BALINESE, // 1B00..1B4C 7911 UNKNOWN, // 1B4D 7912 BALINESE, // 1B4E..1B7F 7913 SUNDANESE, // 1B80..1BBF 7914 BATAK, // 1BC0..1BF3 7915 UNKNOWN, // 1BF4..1BFB 7916 BATAK, // 1BFC..1BFF 7917 LEPCHA, // 1C00..1C37 7918 UNKNOWN, // 1C38..1C3A 7919 LEPCHA, // 1C3B..1C49 7920 UNKNOWN, // 1C4A..1C4C 7921 LEPCHA, // 1C4D..1C4F 7922 OL_CHIKI, // 1C50..1C7F 7923 CYRILLIC, // 1C80..1C8A 7924 UNKNOWN, // 1C8B..1C8F 7925 GEORGIAN, // 1C90..1CBA 7926 UNKNOWN, // 1CBB..1CBC 7927 GEORGIAN, // 1CBD..1CBF 7928 SUNDANESE, // 1CC0..1CC7 7929 UNKNOWN, // 1CC8..1CCF 7930 INHERITED, // 1CD0..1CD2 7931 COMMON, // 1CD3 7932 INHERITED, // 1CD4..1CE0 7933 COMMON, // 1CE1 7934 INHERITED, // 1CE2..1CE8 7935 COMMON, // 1CE9..1CEC 7936 INHERITED, // 1CED 7937 COMMON, // 1CEE..1CF3 7938 INHERITED, // 1CF4 7939 COMMON, // 1CF5..1CF7 7940 INHERITED, // 1CF8..1CF9 7941 COMMON, // 1CFA 7942 UNKNOWN, // 1CFB..1CFF 7943 LATIN, // 1D00..1D25 7944 GREEK, // 1D26..1D2A 7945 CYRILLIC, // 1D2B 7946 LATIN, // 1D2C..1D5C 7947 GREEK, // 1D5D..1D61 7948 LATIN, // 1D62..1D65 7949 GREEK, // 1D66..1D6A 7950 LATIN, // 1D6B..1D77 7951 CYRILLIC, // 1D78 7952 LATIN, // 1D79..1DBE 7953 GREEK, // 1DBF 7954 INHERITED, // 1DC0..1DFF 7955 LATIN, // 1E00..1EFF 7956 GREEK, // 1F00..1F15 7957 UNKNOWN, // 1F16..1F17 7958 GREEK, // 1F18..1F1D 7959 UNKNOWN, // 1F1E..1F1F 7960 GREEK, // 1F20..1F45 7961 UNKNOWN, // 1F46..1F47 7962 GREEK, // 1F48..1F4D 7963 UNKNOWN, // 1F4E..1F4F 7964 GREEK, // 1F50..1F57 7965 UNKNOWN, // 1F58 7966 GREEK, // 1F59 7967 UNKNOWN, // 1F5A 7968 GREEK, // 1F5B 7969 UNKNOWN, // 1F5C 7970 GREEK, // 1F5D 7971 UNKNOWN, // 1F5E 7972 GREEK, // 1F5F..1F7D 7973 UNKNOWN, // 1F7E..1F7F 7974 GREEK, // 1F80..1FB4 7975 UNKNOWN, // 1FB5 7976 GREEK, // 1FB6..1FC4 7977 UNKNOWN, // 1FC5 7978 GREEK, // 1FC6..1FD3 7979 UNKNOWN, // 1FD4..1FD5 7980 GREEK, // 1FD6..1FDB 7981 UNKNOWN, // 1FDC 7982 GREEK, // 1FDD..1FEF 7983 UNKNOWN, // 1FF0..1FF1 7984 GREEK, // 1FF2..1FF4 7985 UNKNOWN, // 1FF5 7986 GREEK, // 1FF6..1FFE 7987 UNKNOWN, // 1FFF 7988 COMMON, // 2000..200B 7989 INHERITED, // 200C..200D 7990 COMMON, // 200E..2064 7991 UNKNOWN, // 2065 7992 COMMON, // 2066..2070 7993 LATIN, // 2071 7994 UNKNOWN, // 2072..2073 7995 COMMON, // 2074..207E 7996 LATIN, // 207F 7997 COMMON, // 2080..208E 7998 UNKNOWN, // 208F 7999 LATIN, // 2090..209C 8000 UNKNOWN, // 209D..209F 8001 COMMON, // 20A0..20C1 8002 UNKNOWN, // 20C2..20CF 8003 INHERITED, // 20D0..20F0 8004 UNKNOWN, // 20F1..20FF 8005 COMMON, // 2100..2125 8006 GREEK, // 2126 8007 COMMON, // 2127..2129 8008 LATIN, // 212A..212B 8009 COMMON, // 212C..2131 8010 LATIN, // 2132 8011 COMMON, // 2133..214D 8012 LATIN, // 214E 8013 COMMON, // 214F..215F 8014 LATIN, // 2160..2188 8015 COMMON, // 2189..218B 8016 UNKNOWN, // 218C..218F 8017 COMMON, // 2190..2429 8018 UNKNOWN, // 242A..243F 8019 COMMON, // 2440..244A 8020 UNKNOWN, // 244B..245F 8021 COMMON, // 2460..27FF 8022 BRAILLE, // 2800..28FF 8023 COMMON, // 2900..2B73 8024 UNKNOWN, // 2B74..2B75 8025 COMMON, // 2B76..2BFF 8026 GLAGOLITIC, // 2C00..2C5F 8027 LATIN, // 2C60..2C7F 8028 COPTIC, // 2C80..2CF3 8029 UNKNOWN, // 2CF4..2CF8 8030 COPTIC, // 2CF9..2CFF 8031 GEORGIAN, // 2D00..2D25 8032 UNKNOWN, // 2D26 8033 GEORGIAN, // 2D27 8034 UNKNOWN, // 2D28..2D2C 8035 GEORGIAN, // 2D2D 8036 UNKNOWN, // 2D2E..2D2F 8037 TIFINAGH, // 2D30..2D67 8038 UNKNOWN, // 2D68..2D6E 8039 TIFINAGH, // 2D6F..2D70 8040 UNKNOWN, // 2D71..2D7E 8041 TIFINAGH, // 2D7F 8042 ETHIOPIC, // 2D80..2D96 8043 UNKNOWN, // 2D97..2D9F 8044 ETHIOPIC, // 2DA0..2DA6 8045 UNKNOWN, // 2DA7 8046 ETHIOPIC, // 2DA8..2DAE 8047 UNKNOWN, // 2DAF 8048 ETHIOPIC, // 2DB0..2DB6 8049 UNKNOWN, // 2DB7 8050 ETHIOPIC, // 2DB8..2DBE 8051 UNKNOWN, // 2DBF 8052 ETHIOPIC, // 2DC0..2DC6 8053 UNKNOWN, // 2DC7 8054 ETHIOPIC, // 2DC8..2DCE 8055 UNKNOWN, // 2DCF 8056 ETHIOPIC, // 2DD0..2DD6 8057 UNKNOWN, // 2DD7 8058 ETHIOPIC, // 2DD8..2DDE 8059 UNKNOWN, // 2DDF 8060 CYRILLIC, // 2DE0..2DFF 8061 COMMON, // 2E00..2E5D 8062 UNKNOWN, // 2E5E..2E7F 8063 HAN, // 2E80..2E99 8064 UNKNOWN, // 2E9A 8065 HAN, // 2E9B..2EF3 8066 UNKNOWN, // 2EF4..2EFF 8067 HAN, // 2F00..2FD5 8068 UNKNOWN, // 2FD6..2FEF 8069 COMMON, // 2FF0..3004 8070 HAN, // 3005 8071 COMMON, // 3006 8072 HAN, // 3007 8073 COMMON, // 3008..3020 8074 HAN, // 3021..3029 8075 INHERITED, // 302A..302D 8076 HANGUL, // 302E..302F 8077 COMMON, // 3030..3037 8078 HAN, // 3038..303B 8079 COMMON, // 303C..303F 8080 UNKNOWN, // 3040 8081 HIRAGANA, // 3041..3096 8082 UNKNOWN, // 3097..3098 8083 INHERITED, // 3099..309A 8084 COMMON, // 309B..309C 8085 HIRAGANA, // 309D..309F 8086 COMMON, // 30A0 8087 KATAKANA, // 30A1..30FA 8088 COMMON, // 30FB..30FC 8089 KATAKANA, // 30FD..30FF 8090 UNKNOWN, // 3100..3104 8091 BOPOMOFO, // 3105..312F 8092 UNKNOWN, // 3130 8093 HANGUL, // 3131..318E 8094 UNKNOWN, // 318F 8095 COMMON, // 3190..319F 8096 BOPOMOFO, // 31A0..31BF 8097 COMMON, // 31C0..31E5 8098 UNKNOWN, // 31E6..31EE 8099 COMMON, // 31EF 8100 KATAKANA, // 31F0..31FF 8101 HANGUL, // 3200..321E 8102 UNKNOWN, // 321F 8103 COMMON, // 3220..325F 8104 HANGUL, // 3260..327E 8105 COMMON, // 327F..32CF 8106 KATAKANA, // 32D0..32FE 8107 COMMON, // 32FF 8108 KATAKANA, // 3300..3357 8109 COMMON, // 3358..33FF 8110 HAN, // 3400..4DBF 8111 COMMON, // 4DC0..4DFF 8112 HAN, // 4E00..9FFF 8113 YI, // A000..A48C 8114 UNKNOWN, // A48D..A48F 8115 YI, // A490..A4C6 8116 UNKNOWN, // A4C7..A4CF 8117 LISU, // A4D0..A4FF 8118 VAI, // A500..A62B 8119 UNKNOWN, // A62C..A63F 8120 CYRILLIC, // A640..A69F 8121 BAMUM, // A6A0..A6F7 8122 UNKNOWN, // A6F8..A6FF 8123 COMMON, // A700..A721 8124 LATIN, // A722..A787 8125 COMMON, // A788..A78A 8126 LATIN, // A78B..A7DC 8127 UNKNOWN, // A7DD..A7F0 8128 LATIN, // A7F1..A7FF 8129 SYLOTI_NAGRI, // A800..A82C 8130 UNKNOWN, // A82D..A82F 8131 COMMON, // A830..A839 8132 UNKNOWN, // A83A..A83F 8133 PHAGS_PA, // A840..A877 8134 UNKNOWN, // A878..A87F 8135 SAURASHTRA, // A880..A8C5 8136 UNKNOWN, // A8C6..A8CD 8137 SAURASHTRA, // A8CE..A8D9 8138 UNKNOWN, // A8DA..A8DF 8139 DEVANAGARI, // A8E0..A8FF 8140 KAYAH_LI, // A900..A92D 8141 COMMON, // A92E 8142 KAYAH_LI, // A92F 8143 REJANG, // A930..A953 8144 UNKNOWN, // A954..A95E 8145 REJANG, // A95F 8146 HANGUL, // A960..A97C 8147 UNKNOWN, // A97D..A97F 8148 JAVANESE, // A980..A9CD 8149 UNKNOWN, // A9CE 8150 COMMON, // A9CF 8151 JAVANESE, // A9D0..A9D9 8152 UNKNOWN, // A9DA..A9DD 8153 JAVANESE, // A9DE..A9DF 8154 MYANMAR, // A9E0..A9FE 8155 UNKNOWN, // A9FF 8156 CHAM, // AA00..AA36 8157 UNKNOWN, // AA37..AA3F 8158 CHAM, // AA40..AA4D 8159 UNKNOWN, // AA4E..AA4F 8160 CHAM, // AA50..AA59 8161 UNKNOWN, // AA5A..AA5B 8162 CHAM, // AA5C..AA5F 8163 MYANMAR, // AA60..AA7F 8164 TAI_VIET, // AA80..AAC2 8165 UNKNOWN, // AAC3..AADA 8166 TAI_VIET, // AADB..AADF 8167 MEETEI_MAYEK, // AAE0..AAF6 8168 UNKNOWN, // AAF7..AB00 8169 ETHIOPIC, // AB01..AB06 8170 UNKNOWN, // AB07..AB08 8171 ETHIOPIC, // AB09..AB0E 8172 UNKNOWN, // AB0F..AB10 8173 ETHIOPIC, // AB11..AB16 8174 UNKNOWN, // AB17..AB1F 8175 ETHIOPIC, // AB20..AB26 8176 UNKNOWN, // AB27 8177 ETHIOPIC, // AB28..AB2E 8178 UNKNOWN, // AB2F 8179 LATIN, // AB30..AB5A 8180 COMMON, // AB5B 8181 LATIN, // AB5C..AB64 8182 GREEK, // AB65 8183 LATIN, // AB66..AB69 8184 COMMON, // AB6A..AB6B 8185 UNKNOWN, // AB6C..AB6F 8186 CHEROKEE, // AB70..ABBF 8187 MEETEI_MAYEK, // ABC0..ABED 8188 UNKNOWN, // ABEE..ABEF 8189 MEETEI_MAYEK, // ABF0..ABF9 8190 UNKNOWN, // ABFA..ABFF 8191 HANGUL, // AC00..D7A3 8192 UNKNOWN, // D7A4..D7AF 8193 HANGUL, // D7B0..D7C6 8194 UNKNOWN, // D7C7..D7CA 8195 HANGUL, // D7CB..D7FB 8196 UNKNOWN, // D7FC..F8FF 8197 HAN, // F900..FA6D 8198 UNKNOWN, // FA6E..FA6F 8199 HAN, // FA70..FAD9 8200 UNKNOWN, // FADA..FAFF 8201 LATIN, // FB00..FB06 8202 UNKNOWN, // FB07..FB12 8203 ARMENIAN, // FB13..FB17 8204 UNKNOWN, // FB18..FB1C 8205 HEBREW, // FB1D..FB36 8206 UNKNOWN, // FB37 8207 HEBREW, // FB38..FB3C 8208 UNKNOWN, // FB3D 8209 HEBREW, // FB3E 8210 UNKNOWN, // FB3F 8211 HEBREW, // FB40..FB41 8212 UNKNOWN, // FB42 8213 HEBREW, // FB43..FB44 8214 UNKNOWN, // FB45 8215 HEBREW, // FB46..FB4F 8216 ARABIC, // FB50..FD3D 8217 COMMON, // FD3E..FD3F 8218 ARABIC, // FD40..FDCF 8219 UNKNOWN, // FDD0..FDEF 8220 ARABIC, // FDF0..FDFF 8221 INHERITED, // FE00..FE0F 8222 COMMON, // FE10..FE19 8223 UNKNOWN, // FE1A..FE1F 8224 INHERITED, // FE20..FE2D 8225 CYRILLIC, // FE2E..FE2F 8226 COMMON, // FE30..FE52 8227 UNKNOWN, // FE53 8228 COMMON, // FE54..FE66 8229 UNKNOWN, // FE67 8230 COMMON, // FE68..FE6B 8231 UNKNOWN, // FE6C..FE6F 8232 ARABIC, // FE70..FE74 8233 UNKNOWN, // FE75 8234 ARABIC, // FE76..FEFC 8235 UNKNOWN, // FEFD..FEFE 8236 COMMON, // FEFF 8237 UNKNOWN, // FF00 8238 COMMON, // FF01..FF20 8239 LATIN, // FF21..FF3A 8240 COMMON, // FF3B..FF40 8241 LATIN, // FF41..FF5A 8242 COMMON, // FF5B..FF65 8243 KATAKANA, // FF66..FF6F 8244 COMMON, // FF70 8245 KATAKANA, // FF71..FF9D 8246 COMMON, // FF9E..FF9F 8247 HANGUL, // FFA0..FFBE 8248 UNKNOWN, // FFBF..FFC1 8249 HANGUL, // FFC2..FFC7 8250 UNKNOWN, // FFC8..FFC9 8251 HANGUL, // FFCA..FFCF 8252 UNKNOWN, // FFD0..FFD1 8253 HANGUL, // FFD2..FFD7 8254 UNKNOWN, // FFD8..FFD9 8255 HANGUL, // FFDA..FFDC 8256 UNKNOWN, // FFDD..FFDF 8257 COMMON, // FFE0..FFE6 8258 UNKNOWN, // FFE7 8259 COMMON, // FFE8..FFEE 8260 UNKNOWN, // FFEF..FFF8 8261 COMMON, // FFF9..FFFD 8262 UNKNOWN, // FFFE..FFFF 8263 LINEAR_B, // 10000..1000B 8264 UNKNOWN, // 1000C 8265 LINEAR_B, // 1000D..10026 8266 UNKNOWN, // 10027 8267 LINEAR_B, // 10028..1003A 8268 UNKNOWN, // 1003B 8269 LINEAR_B, // 1003C..1003D 8270 UNKNOWN, // 1003E 8271 LINEAR_B, // 1003F..1004D 8272 UNKNOWN, // 1004E..1004F 8273 LINEAR_B, // 10050..1005D 8274 UNKNOWN, // 1005E..1007F 8275 LINEAR_B, // 10080..100FA 8276 UNKNOWN, // 100FB..100FF 8277 COMMON, // 10100..10102 8278 UNKNOWN, // 10103..10106 8279 COMMON, // 10107..10133 8280 UNKNOWN, // 10134..10136 8281 COMMON, // 10137..1013F 8282 GREEK, // 10140..1018E 8283 UNKNOWN, // 1018F 8284 COMMON, // 10190..1019C 8285 UNKNOWN, // 1019D..1019F 8286 GREEK, // 101A0 8287 UNKNOWN, // 101A1..101CF 8288 COMMON, // 101D0..101FC 8289 INHERITED, // 101FD 8290 UNKNOWN, // 101FE..1027F 8291 LYCIAN, // 10280..1029C 8292 UNKNOWN, // 1029D..1029F 8293 CARIAN, // 102A0..102D0 8294 UNKNOWN, // 102D1..102DF 8295 INHERITED, // 102E0 8296 COMMON, // 102E1..102FB 8297 UNKNOWN, // 102FC..102FF 8298 OLD_ITALIC, // 10300..10323 8299 UNKNOWN, // 10324..1032C 8300 OLD_ITALIC, // 1032D..1032F 8301 GOTHIC, // 10330..1034A 8302 UNKNOWN, // 1034B..1034F 8303 OLD_PERMIC, // 10350..1037A 8304 UNKNOWN, // 1037B..1037F 8305 UGARITIC, // 10380..1039D 8306 UNKNOWN, // 1039E 8307 UGARITIC, // 1039F 8308 OLD_PERSIAN, // 103A0..103C3 8309 UNKNOWN, // 103C4..103C7 8310 OLD_PERSIAN, // 103C8..103D5 8311 UNKNOWN, // 103D6..103FF 8312 DESERET, // 10400..1044F 8313 SHAVIAN, // 10450..1047F 8314 OSMANYA, // 10480..1049D 8315 UNKNOWN, // 1049E..1049F 8316 OSMANYA, // 104A0..104A9 8317 UNKNOWN, // 104AA..104AF 8318 OSAGE, // 104B0..104D3 8319 UNKNOWN, // 104D4..104D7 8320 OSAGE, // 104D8..104FB 8321 UNKNOWN, // 104FC..104FF 8322 ELBASAN, // 10500..10527 8323 UNKNOWN, // 10528..1052F 8324 CAUCASIAN_ALBANIAN, // 10530..10563 8325 UNKNOWN, // 10564..1056E 8326 CAUCASIAN_ALBANIAN, // 1056F 8327 VITHKUQI, // 10570..1057A 8328 UNKNOWN, // 1057B 8329 VITHKUQI, // 1057C..1058A 8330 UNKNOWN, // 1058B 8331 VITHKUQI, // 1058C..10592 8332 UNKNOWN, // 10593 8333 VITHKUQI, // 10594..10595 8334 UNKNOWN, // 10596 8335 VITHKUQI, // 10597..105A1 8336 UNKNOWN, // 105A2 8337 VITHKUQI, // 105A3..105B1 8338 UNKNOWN, // 105B2 8339 VITHKUQI, // 105B3..105B9 8340 UNKNOWN, // 105BA 8341 VITHKUQI, // 105BB..105BC 8342 UNKNOWN, // 105BD..105BF 8343 TODHRI, // 105C0..105F3 8344 UNKNOWN, // 105F4..105FF 8345 LINEAR_A, // 10600..10736 8346 UNKNOWN, // 10737..1073F 8347 LINEAR_A, // 10740..10755 8348 UNKNOWN, // 10756..1075F 8349 LINEAR_A, // 10760..10767 8350 UNKNOWN, // 10768..1077F 8351 LATIN, // 10780..10785 8352 UNKNOWN, // 10786 8353 LATIN, // 10787..107B0 8354 UNKNOWN, // 107B1 8355 LATIN, // 107B2..107BA 8356 UNKNOWN, // 107BB..107FF 8357 CYPRIOT, // 10800..10805 8358 UNKNOWN, // 10806..10807 8359 CYPRIOT, // 10808 8360 UNKNOWN, // 10809 8361 CYPRIOT, // 1080A..10835 8362 UNKNOWN, // 10836 8363 CYPRIOT, // 10837..10838 8364 UNKNOWN, // 10839..1083B 8365 CYPRIOT, // 1083C 8366 UNKNOWN, // 1083D..1083E 8367 CYPRIOT, // 1083F 8368 IMPERIAL_ARAMAIC, // 10840..10855 8369 UNKNOWN, // 10856 8370 IMPERIAL_ARAMAIC, // 10857..1085F 8371 PALMYRENE, // 10860..1087F 8372 NABATAEAN, // 10880..1089E 8373 UNKNOWN, // 1089F..108A6 8374 NABATAEAN, // 108A7..108AF 8375 UNKNOWN, // 108B0..108DF 8376 HATRAN, // 108E0..108F2 8377 UNKNOWN, // 108F3 8378 HATRAN, // 108F4..108F5 8379 UNKNOWN, // 108F6..108FA 8380 HATRAN, // 108FB..108FF 8381 PHOENICIAN, // 10900..1091B 8382 UNKNOWN, // 1091C..1091E 8383 PHOENICIAN, // 1091F 8384 LYDIAN, // 10920..10939 8385 UNKNOWN, // 1093A..1093E 8386 LYDIAN, // 1093F 8387 SIDETIC, // 10940..10959 8388 UNKNOWN, // 1095A..1097F 8389 MEROITIC_HIEROGLYPHS, // 10980..1099F 8390 MEROITIC_CURSIVE, // 109A0..109B7 8391 UNKNOWN, // 109B8..109BB 8392 MEROITIC_CURSIVE, // 109BC..109CF 8393 UNKNOWN, // 109D0..109D1 8394 MEROITIC_CURSIVE, // 109D2..109FF 8395 KHAROSHTHI, // 10A00..10A03 8396 UNKNOWN, // 10A04 8397 KHAROSHTHI, // 10A05..10A06 8398 UNKNOWN, // 10A07..10A0B 8399 KHAROSHTHI, // 10A0C..10A13 8400 UNKNOWN, // 10A14 8401 KHAROSHTHI, // 10A15..10A17 8402 UNKNOWN, // 10A18 8403 KHAROSHTHI, // 10A19..10A35 8404 UNKNOWN, // 10A36..10A37 8405 KHAROSHTHI, // 10A38..10A3A 8406 UNKNOWN, // 10A3B..10A3E 8407 KHAROSHTHI, // 10A3F..10A48 8408 UNKNOWN, // 10A49..10A4F 8409 KHAROSHTHI, // 10A50..10A58 8410 UNKNOWN, // 10A59..10A5F 8411 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8412 OLD_NORTH_ARABIAN, // 10A80..10A9F 8413 UNKNOWN, // 10AA0..10ABF 8414 MANICHAEAN, // 10AC0..10AE6 8415 UNKNOWN, // 10AE7..10AEA 8416 MANICHAEAN, // 10AEB..10AF6 8417 UNKNOWN, // 10AF7..10AFF 8418 AVESTAN, // 10B00..10B35 8419 UNKNOWN, // 10B36..10B38 8420 AVESTAN, // 10B39..10B3F 8421 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8422 UNKNOWN, // 10B56..10B57 8423 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8424 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8425 UNKNOWN, // 10B73..10B77 8426 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8427 PSALTER_PAHLAVI, // 10B80..10B91 8428 UNKNOWN, // 10B92..10B98 8429 PSALTER_PAHLAVI, // 10B99..10B9C 8430 UNKNOWN, // 10B9D..10BA8 8431 PSALTER_PAHLAVI, // 10BA9..10BAF 8432 UNKNOWN, // 10BB0..10BFF 8433 OLD_TURKIC, // 10C00..10C48 8434 UNKNOWN, // 10C49..10C7F 8435 OLD_HUNGARIAN, // 10C80..10CB2 8436 UNKNOWN, // 10CB3..10CBF 8437 OLD_HUNGARIAN, // 10CC0..10CF2 8438 UNKNOWN, // 10CF3..10CF9 8439 OLD_HUNGARIAN, // 10CFA..10CFF 8440 HANIFI_ROHINGYA, // 10D00..10D27 8441 UNKNOWN, // 10D28..10D2F 8442 HANIFI_ROHINGYA, // 10D30..10D39 8443 UNKNOWN, // 10D3A..10D3F 8444 GARAY, // 10D40..10D65 8445 UNKNOWN, // 10D66..10D68 8446 GARAY, // 10D69..10D85 8447 UNKNOWN, // 10D86..10D8D 8448 GARAY, // 10D8E..10D8F 8449 UNKNOWN, // 10D90..10E5F 8450 ARABIC, // 10E60..10E7E 8451 UNKNOWN, // 10E7F 8452 YEZIDI, // 10E80..10EA9 8453 UNKNOWN, // 10EAA 8454 YEZIDI, // 10EAB..10EAD 8455 UNKNOWN, // 10EAE..10EAF 8456 YEZIDI, // 10EB0..10EB1 8457 UNKNOWN, // 10EB2..10EC1 8458 ARABIC, // 10EC2..10EC7 8459 UNKNOWN, // 10EC8..10ECF 8460 ARABIC, // 10ED0..10ED8 8461 UNKNOWN, // 10ED9..10EF9 8462 ARABIC, // 10EFA..10EFF 8463 OLD_SOGDIAN, // 10F00..10F27 8464 UNKNOWN, // 10F28..10F2F 8465 SOGDIAN, // 10F30..10F59 8466 UNKNOWN, // 10F5A..10F6F 8467 OLD_UYGHUR, // 10F70..10F89 8468 UNKNOWN, // 10F8A..10FAF 8469 CHORASMIAN, // 10FB0..10FCB 8470 UNKNOWN, // 10FCC..10FDF 8471 ELYMAIC, // 10FE0..10FF6 8472 UNKNOWN, // 10FF7..10FFF 8473 BRAHMI, // 11000..1104D 8474 UNKNOWN, // 1104E..11051 8475 BRAHMI, // 11052..11075 8476 UNKNOWN, // 11076..1107E 8477 BRAHMI, // 1107F 8478 KAITHI, // 11080..110C2 8479 UNKNOWN, // 110C3..110CC 8480 KAITHI, // 110CD 8481 UNKNOWN, // 110CE..110CF 8482 SORA_SOMPENG, // 110D0..110E8 8483 UNKNOWN, // 110E9..110EF 8484 SORA_SOMPENG, // 110F0..110F9 8485 UNKNOWN, // 110FA..110FF 8486 CHAKMA, // 11100..11134 8487 UNKNOWN, // 11135 8488 CHAKMA, // 11136..11147 8489 UNKNOWN, // 11148..1114F 8490 MAHAJANI, // 11150..11176 8491 UNKNOWN, // 11177..1117F 8492 SHARADA, // 11180..111DF 8493 UNKNOWN, // 111E0 8494 SINHALA, // 111E1..111F4 8495 UNKNOWN, // 111F5..111FF 8496 KHOJKI, // 11200..11211 8497 UNKNOWN, // 11212 8498 KHOJKI, // 11213..11241 8499 UNKNOWN, // 11242..1127F 8500 MULTANI, // 11280..11286 8501 UNKNOWN, // 11287 8502 MULTANI, // 11288 8503 UNKNOWN, // 11289 8504 MULTANI, // 1128A..1128D 8505 UNKNOWN, // 1128E 8506 MULTANI, // 1128F..1129D 8507 UNKNOWN, // 1129E 8508 MULTANI, // 1129F..112A9 8509 UNKNOWN, // 112AA..112AF 8510 KHUDAWADI, // 112B0..112EA 8511 UNKNOWN, // 112EB..112EF 8512 KHUDAWADI, // 112F0..112F9 8513 UNKNOWN, // 112FA..112FF 8514 GRANTHA, // 11300..11303 8515 UNKNOWN, // 11304 8516 GRANTHA, // 11305..1130C 8517 UNKNOWN, // 1130D..1130E 8518 GRANTHA, // 1130F..11310 8519 UNKNOWN, // 11311..11312 8520 GRANTHA, // 11313..11328 8521 UNKNOWN, // 11329 8522 GRANTHA, // 1132A..11330 8523 UNKNOWN, // 11331 8524 GRANTHA, // 11332..11333 8525 UNKNOWN, // 11334 8526 GRANTHA, // 11335..11339 8527 UNKNOWN, // 1133A 8528 INHERITED, // 1133B 8529 GRANTHA, // 1133C..11344 8530 UNKNOWN, // 11345..11346 8531 GRANTHA, // 11347..11348 8532 UNKNOWN, // 11349..1134A 8533 GRANTHA, // 1134B..1134D 8534 UNKNOWN, // 1134E..1134F 8535 GRANTHA, // 11350 8536 UNKNOWN, // 11351..11356 8537 GRANTHA, // 11357 8538 UNKNOWN, // 11358..1135C 8539 GRANTHA, // 1135D..11363 8540 UNKNOWN, // 11364..11365 8541 GRANTHA, // 11366..1136C 8542 UNKNOWN, // 1136D..1136F 8543 GRANTHA, // 11370..11374 8544 UNKNOWN, // 11375..1137F 8545 TULU_TIGALARI, // 11380..11389 8546 UNKNOWN, // 1138A 8547 TULU_TIGALARI, // 1138B 8548 UNKNOWN, // 1138C..1138D 8549 TULU_TIGALARI, // 1138E 8550 UNKNOWN, // 1138F 8551 TULU_TIGALARI, // 11390..113B5 8552 UNKNOWN, // 113B6 8553 TULU_TIGALARI, // 113B7..113C0 8554 UNKNOWN, // 113C1 8555 TULU_TIGALARI, // 113C2 8556 UNKNOWN, // 113C3..113C4 8557 TULU_TIGALARI, // 113C5 8558 UNKNOWN, // 113C6 8559 TULU_TIGALARI, // 113C7..113CA 8560 UNKNOWN, // 113CB 8561 TULU_TIGALARI, // 113CC..113D5 8562 UNKNOWN, // 113D6 8563 TULU_TIGALARI, // 113D7..113D8 8564 UNKNOWN, // 113D9..113E0 8565 TULU_TIGALARI, // 113E1..113E2 8566 UNKNOWN, // 113E3..113FF 8567 NEWA, // 11400..1145B 8568 UNKNOWN, // 1145C 8569 NEWA, // 1145D..11461 8570 UNKNOWN, // 11462..1147F 8571 TIRHUTA, // 11480..114C7 8572 UNKNOWN, // 114C8..114CF 8573 TIRHUTA, // 114D0..114D9 8574 UNKNOWN, // 114DA..1157F 8575 SIDDHAM, // 11580..115B5 8576 UNKNOWN, // 115B6..115B7 8577 SIDDHAM, // 115B8..115DD 8578 UNKNOWN, // 115DE..115FF 8579 MODI, // 11600..11644 8580 UNKNOWN, // 11645..1164F 8581 MODI, // 11650..11659 8582 UNKNOWN, // 1165A..1165F 8583 MONGOLIAN, // 11660..1166C 8584 UNKNOWN, // 1166D..1167F 8585 TAKRI, // 11680..116B9 8586 UNKNOWN, // 116BA..116BF 8587 TAKRI, // 116C0..116C9 8588 UNKNOWN, // 116CA..116CF 8589 MYANMAR, // 116D0..116E3 8590 UNKNOWN, // 116E4..116FF 8591 AHOM, // 11700..1171A 8592 UNKNOWN, // 1171B..1171C 8593 AHOM, // 1171D..1172B 8594 UNKNOWN, // 1172C..1172F 8595 AHOM, // 11730..11746 8596 UNKNOWN, // 11747..117FF 8597 DOGRA, // 11800..1183B 8598 UNKNOWN, // 1183C..1189F 8599 WARANG_CITI, // 118A0..118F2 8600 UNKNOWN, // 118F3..118FE 8601 WARANG_CITI, // 118FF 8602 DIVES_AKURU, // 11900..11906 8603 UNKNOWN, // 11907..11908 8604 DIVES_AKURU, // 11909 8605 UNKNOWN, // 1190A..1190B 8606 DIVES_AKURU, // 1190C..11913 8607 UNKNOWN, // 11914 8608 DIVES_AKURU, // 11915..11916 8609 UNKNOWN, // 11917 8610 DIVES_AKURU, // 11918..11935 8611 UNKNOWN, // 11936 8612 DIVES_AKURU, // 11937..11938 8613 UNKNOWN, // 11939..1193A 8614 DIVES_AKURU, // 1193B..11946 8615 UNKNOWN, // 11947..1194F 8616 DIVES_AKURU, // 11950..11959 8617 UNKNOWN, // 1195A..1199F 8618 NANDINAGARI, // 119A0..119A7 8619 UNKNOWN, // 119A8..119A9 8620 NANDINAGARI, // 119AA..119D7 8621 UNKNOWN, // 119D8..119D9 8622 NANDINAGARI, // 119DA..119E4 8623 UNKNOWN, // 119E5..119FF 8624 ZANABAZAR_SQUARE, // 11A00..11A47 8625 UNKNOWN, // 11A48..11A4F 8626 SOYOMBO, // 11A50..11AA2 8627 UNKNOWN, // 11AA3..11AAF 8628 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8629 PAU_CIN_HAU, // 11AC0..11AF8 8630 UNKNOWN, // 11AF9..11AFF 8631 DEVANAGARI, // 11B00..11B09 8632 UNKNOWN, // 11B0A..11B5F 8633 SHARADA, // 11B60..11B67 8634 UNKNOWN, // 11B68..11BBF 8635 SUNUWAR, // 11BC0..11BE1 8636 UNKNOWN, // 11BE2..11BEF 8637 SUNUWAR, // 11BF0..11BF9 8638 UNKNOWN, // 11BFA..11BFF 8639 BHAIKSUKI, // 11C00..11C08 8640 UNKNOWN, // 11C09 8641 BHAIKSUKI, // 11C0A..11C36 8642 UNKNOWN, // 11C37 8643 BHAIKSUKI, // 11C38..11C45 8644 UNKNOWN, // 11C46..11C4F 8645 BHAIKSUKI, // 11C50..11C6C 8646 UNKNOWN, // 11C6D..11C6F 8647 MARCHEN, // 11C70..11C8F 8648 UNKNOWN, // 11C90..11C91 8649 MARCHEN, // 11C92..11CA7 8650 UNKNOWN, // 11CA8 8651 MARCHEN, // 11CA9..11CB6 8652 UNKNOWN, // 11CB7..11CFF 8653 MASARAM_GONDI, // 11D00..11D06 8654 UNKNOWN, // 11D07 8655 MASARAM_GONDI, // 11D08..11D09 8656 UNKNOWN, // 11D0A 8657 MASARAM_GONDI, // 11D0B..11D36 8658 UNKNOWN, // 11D37..11D39 8659 MASARAM_GONDI, // 11D3A 8660 UNKNOWN, // 11D3B 8661 MASARAM_GONDI, // 11D3C..11D3D 8662 UNKNOWN, // 11D3E 8663 MASARAM_GONDI, // 11D3F..11D47 8664 UNKNOWN, // 11D48..11D4F 8665 MASARAM_GONDI, // 11D50..11D59 8666 UNKNOWN, // 11D5A..11D5F 8667 GUNJALA_GONDI, // 11D60..11D65 8668 UNKNOWN, // 11D66 8669 GUNJALA_GONDI, // 11D67..11D68 8670 UNKNOWN, // 11D69 8671 GUNJALA_GONDI, // 11D6A..11D8E 8672 UNKNOWN, // 11D8F 8673 GUNJALA_GONDI, // 11D90..11D91 8674 UNKNOWN, // 11D92 8675 GUNJALA_GONDI, // 11D93..11D98 8676 UNKNOWN, // 11D99..11D9F 8677 GUNJALA_GONDI, // 11DA0..11DA9 8678 UNKNOWN, // 11DAA..11DAF 8679 TOLONG_SIKI, // 11DB0..11DDB 8680 UNKNOWN, // 11DDC..11DDF 8681 TOLONG_SIKI, // 11DE0..11DE9 8682 UNKNOWN, // 11DEA..11EDF 8683 MAKASAR, // 11EE0..11EF8 8684 UNKNOWN, // 11EF9..11EFF 8685 KAWI, // 11F00..11F10 8686 UNKNOWN, // 11F11 8687 KAWI, // 11F12..11F3A 8688 UNKNOWN, // 11F3B..11F3D 8689 KAWI, // 11F3E..11F5A 8690 UNKNOWN, // 11F5B..11FAF 8691 LISU, // 11FB0 8692 UNKNOWN, // 11FB1..11FBF 8693 TAMIL, // 11FC0..11FF1 8694 UNKNOWN, // 11FF2..11FFE 8695 TAMIL, // 11FFF 8696 CUNEIFORM, // 12000..12399 8697 UNKNOWN, // 1239A..123FF 8698 CUNEIFORM, // 12400..1246E 8699 UNKNOWN, // 1246F 8700 CUNEIFORM, // 12470..12474 8701 UNKNOWN, // 12475..1247F 8702 CUNEIFORM, // 12480..12543 8703 UNKNOWN, // 12544..12F8F 8704 CYPRO_MINOAN, // 12F90..12FF2 8705 UNKNOWN, // 12FF3..12FFF 8706 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8707 UNKNOWN, // 13456..1345F 8708 EGYPTIAN_HIEROGLYPHS, // 13460..143FA 8709 UNKNOWN, // 143FB..143FF 8710 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8711 UNKNOWN, // 14647..160FF 8712 GURUNG_KHEMA, // 16100..16139 8713 UNKNOWN, // 1613A..167FF 8714 BAMUM, // 16800..16A38 8715 UNKNOWN, // 16A39..16A3F 8716 MRO, // 16A40..16A5E 8717 UNKNOWN, // 16A5F 8718 MRO, // 16A60..16A69 8719 UNKNOWN, // 16A6A..16A6D 8720 MRO, // 16A6E..16A6F 8721 TANGSA, // 16A70..16ABE 8722 UNKNOWN, // 16ABF 8723 TANGSA, // 16AC0..16AC9 8724 UNKNOWN, // 16ACA..16ACF 8725 BASSA_VAH, // 16AD0..16AED 8726 UNKNOWN, // 16AEE..16AEF 8727 BASSA_VAH, // 16AF0..16AF5 8728 UNKNOWN, // 16AF6..16AFF 8729 PAHAWH_HMONG, // 16B00..16B45 8730 UNKNOWN, // 16B46..16B4F 8731 PAHAWH_HMONG, // 16B50..16B59 8732 UNKNOWN, // 16B5A 8733 PAHAWH_HMONG, // 16B5B..16B61 8734 UNKNOWN, // 16B62 8735 PAHAWH_HMONG, // 16B63..16B77 8736 UNKNOWN, // 16B78..16B7C 8737 PAHAWH_HMONG, // 16B7D..16B8F 8738 UNKNOWN, // 16B90..16D3F 8739 KIRAT_RAI, // 16D40..16D79 8740 UNKNOWN, // 16D7A..16E3F 8741 MEDEFAIDRIN, // 16E40..16E9A 8742 UNKNOWN, // 16E9B..16E9F 8743 BERIA_ERFE, // 16EA0..16EB8 8744 UNKNOWN, // 16EB9..16EBA 8745 BERIA_ERFE, // 16EBB..16ED3 8746 UNKNOWN, // 16ED4..16EFF 8747 MIAO, // 16F00..16F4A 8748 UNKNOWN, // 16F4B..16F4E 8749 MIAO, // 16F4F..16F87 8750 UNKNOWN, // 16F88..16F8E 8751 MIAO, // 16F8F..16F9F 8752 UNKNOWN, // 16FA0..16FDF 8753 TANGUT, // 16FE0 8754 NUSHU, // 16FE1 8755 HAN, // 16FE2..16FE3 8756 KHITAN_SMALL_SCRIPT, // 16FE4 8757 UNKNOWN, // 16FE5..16FEF 8758 HAN, // 16FF0..16FF6 8759 UNKNOWN, // 16FF7..16FFF 8760 TANGUT, // 17000..18AFF 8761 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8762 UNKNOWN, // 18CD6..18CFE 8763 KHITAN_SMALL_SCRIPT, // 18CFF 8764 TANGUT, // 18D00..18D1E 8765 UNKNOWN, // 18D1F..18D7F 8766 TANGUT, // 18D80..18DF2 8767 UNKNOWN, // 18DF3..1AFEF 8768 KATAKANA, // 1AFF0..1AFF3 8769 UNKNOWN, // 1AFF4 8770 KATAKANA, // 1AFF5..1AFFB 8771 UNKNOWN, // 1AFFC 8772 KATAKANA, // 1AFFD..1AFFE 8773 UNKNOWN, // 1AFFF 8774 KATAKANA, // 1B000 8775 HIRAGANA, // 1B001..1B11F 8776 KATAKANA, // 1B120..1B122 8777 UNKNOWN, // 1B123..1B131 8778 HIRAGANA, // 1B132 8779 UNKNOWN, // 1B133..1B14F 8780 HIRAGANA, // 1B150..1B152 8781 UNKNOWN, // 1B153..1B154 8782 KATAKANA, // 1B155 8783 UNKNOWN, // 1B156..1B163 8784 KATAKANA, // 1B164..1B167 8785 UNKNOWN, // 1B168..1B16F 8786 NUSHU, // 1B170..1B2FB 8787 UNKNOWN, // 1B2FC..1BBFF 8788 DUPLOYAN, // 1BC00..1BC6A 8789 UNKNOWN, // 1BC6B..1BC6F 8790 DUPLOYAN, // 1BC70..1BC7C 8791 UNKNOWN, // 1BC7D..1BC7F 8792 DUPLOYAN, // 1BC80..1BC88 8793 UNKNOWN, // 1BC89..1BC8F 8794 DUPLOYAN, // 1BC90..1BC99 8795 UNKNOWN, // 1BC9A..1BC9B 8796 DUPLOYAN, // 1BC9C..1BC9F 8797 COMMON, // 1BCA0..1BCA3 8798 UNKNOWN, // 1BCA4..1CBFF 8799 COMMON, // 1CC00..1CCFC 8800 UNKNOWN, // 1CCFD..1CCFF 8801 COMMON, // 1CD00..1CEB3 8802 UNKNOWN, // 1CEB4..1CEB9 8803 COMMON, // 1CEBA..1CED0 8804 UNKNOWN, // 1CED1..1CEDF 8805 COMMON, // 1CEE0..1CEF0 8806 UNKNOWN, // 1CEF1..1CEFF 8807 INHERITED, // 1CF00..1CF2D 8808 UNKNOWN, // 1CF2E..1CF2F 8809 INHERITED, // 1CF30..1CF46 8810 UNKNOWN, // 1CF47..1CF4F 8811 COMMON, // 1CF50..1CFC3 8812 UNKNOWN, // 1CFC4..1CFFF 8813 COMMON, // 1D000..1D0F5 8814 UNKNOWN, // 1D0F6..1D0FF 8815 COMMON, // 1D100..1D126 8816 UNKNOWN, // 1D127..1D128 8817 COMMON, // 1D129..1D166 8818 INHERITED, // 1D167..1D169 8819 COMMON, // 1D16A..1D17A 8820 INHERITED, // 1D17B..1D182 8821 COMMON, // 1D183..1D184 8822 INHERITED, // 1D185..1D18B 8823 COMMON, // 1D18C..1D1A9 8824 INHERITED, // 1D1AA..1D1AD 8825 COMMON, // 1D1AE..1D1EA 8826 UNKNOWN, // 1D1EB..1D1FF 8827 GREEK, // 1D200..1D245 8828 UNKNOWN, // 1D246..1D2BF 8829 COMMON, // 1D2C0..1D2D3 8830 UNKNOWN, // 1D2D4..1D2DF 8831 COMMON, // 1D2E0..1D2F3 8832 UNKNOWN, // 1D2F4..1D2FF 8833 COMMON, // 1D300..1D356 8834 UNKNOWN, // 1D357..1D35F 8835 COMMON, // 1D360..1D378 8836 UNKNOWN, // 1D379..1D3FF 8837 COMMON, // 1D400..1D454 8838 UNKNOWN, // 1D455 8839 COMMON, // 1D456..1D49C 8840 UNKNOWN, // 1D49D 8841 COMMON, // 1D49E..1D49F 8842 UNKNOWN, // 1D4A0..1D4A1 8843 COMMON, // 1D4A2 8844 UNKNOWN, // 1D4A3..1D4A4 8845 COMMON, // 1D4A5..1D4A6 8846 UNKNOWN, // 1D4A7..1D4A8 8847 COMMON, // 1D4A9..1D4AC 8848 UNKNOWN, // 1D4AD 8849 COMMON, // 1D4AE..1D4B9 8850 UNKNOWN, // 1D4BA 8851 COMMON, // 1D4BB 8852 UNKNOWN, // 1D4BC 8853 COMMON, // 1D4BD..1D4C3 8854 UNKNOWN, // 1D4C4 8855 COMMON, // 1D4C5..1D505 8856 UNKNOWN, // 1D506 8857 COMMON, // 1D507..1D50A 8858 UNKNOWN, // 1D50B..1D50C 8859 COMMON, // 1D50D..1D514 8860 UNKNOWN, // 1D515 8861 COMMON, // 1D516..1D51C 8862 UNKNOWN, // 1D51D 8863 COMMON, // 1D51E..1D539 8864 UNKNOWN, // 1D53A 8865 COMMON, // 1D53B..1D53E 8866 UNKNOWN, // 1D53F 8867 COMMON, // 1D540..1D544 8868 UNKNOWN, // 1D545 8869 COMMON, // 1D546 8870 UNKNOWN, // 1D547..1D549 8871 COMMON, // 1D54A..1D550 8872 UNKNOWN, // 1D551 8873 COMMON, // 1D552..1D6A5 8874 UNKNOWN, // 1D6A6..1D6A7 8875 COMMON, // 1D6A8..1D7CB 8876 UNKNOWN, // 1D7CC..1D7CD 8877 COMMON, // 1D7CE..1D7FF 8878 SIGNWRITING, // 1D800..1DA8B 8879 UNKNOWN, // 1DA8C..1DA9A 8880 SIGNWRITING, // 1DA9B..1DA9F 8881 UNKNOWN, // 1DAA0 8882 SIGNWRITING, // 1DAA1..1DAAF 8883 UNKNOWN, // 1DAB0..1DEFF 8884 LATIN, // 1DF00..1DF1E 8885 UNKNOWN, // 1DF1F..1DF24 8886 LATIN, // 1DF25..1DF2A 8887 UNKNOWN, // 1DF2B..1DFFF 8888 GLAGOLITIC, // 1E000..1E006 8889 UNKNOWN, // 1E007 8890 GLAGOLITIC, // 1E008..1E018 8891 UNKNOWN, // 1E019..1E01A 8892 GLAGOLITIC, // 1E01B..1E021 8893 UNKNOWN, // 1E022 8894 GLAGOLITIC, // 1E023..1E024 8895 UNKNOWN, // 1E025 8896 GLAGOLITIC, // 1E026..1E02A 8897 UNKNOWN, // 1E02B..1E02F 8898 CYRILLIC, // 1E030..1E06D 8899 UNKNOWN, // 1E06E..1E08E 8900 CYRILLIC, // 1E08F 8901 UNKNOWN, // 1E090..1E0FF 8902 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8903 UNKNOWN, // 1E12D..1E12F 8904 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8905 UNKNOWN, // 1E13E..1E13F 8906 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8907 UNKNOWN, // 1E14A..1E14D 8908 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8909 UNKNOWN, // 1E150..1E28F 8910 TOTO, // 1E290..1E2AE 8911 UNKNOWN, // 1E2AF..1E2BF 8912 WANCHO, // 1E2C0..1E2F9 8913 UNKNOWN, // 1E2FA..1E2FE 8914 WANCHO, // 1E2FF 8915 UNKNOWN, // 1E300..1E4CF 8916 NAG_MUNDARI, // 1E4D0..1E4F9 8917 UNKNOWN, // 1E4FA..1E5CF 8918 OL_ONAL, // 1E5D0..1E5FA 8919 UNKNOWN, // 1E5FB..1E5FE 8920 OL_ONAL, // 1E5FF 8921 UNKNOWN, // 1E600..1E6BF 8922 TAI_YO, // 1E6C0..1E6DE 8923 UNKNOWN, // 1E6DF 8924 TAI_YO, // 1E6E0..1E6F5 8925 UNKNOWN, // 1E6F6..1E6FD 8926 TAI_YO, // 1E6FE..1E6FF 8927 UNKNOWN, // 1E700..1E7DF 8928 ETHIOPIC, // 1E7E0..1E7E6 8929 UNKNOWN, // 1E7E7 8930 ETHIOPIC, // 1E7E8..1E7EB 8931 UNKNOWN, // 1E7EC 8932 ETHIOPIC, // 1E7ED..1E7EE 8933 UNKNOWN, // 1E7EF 8934 ETHIOPIC, // 1E7F0..1E7FE 8935 UNKNOWN, // 1E7FF 8936 MENDE_KIKAKUI, // 1E800..1E8C4 8937 UNKNOWN, // 1E8C5..1E8C6 8938 MENDE_KIKAKUI, // 1E8C7..1E8D6 8939 UNKNOWN, // 1E8D7..1E8FF 8940 ADLAM, // 1E900..1E94B 8941 UNKNOWN, // 1E94C..1E94F 8942 ADLAM, // 1E950..1E959 8943 UNKNOWN, // 1E95A..1E95D 8944 ADLAM, // 1E95E..1E95F 8945 UNKNOWN, // 1E960..1EC70 8946 COMMON, // 1EC71..1ECB4 8947 UNKNOWN, // 1ECB5..1ED00 8948 COMMON, // 1ED01..1ED3D 8949 UNKNOWN, // 1ED3E..1EDFF 8950 ARABIC, // 1EE00..1EE03 8951 UNKNOWN, // 1EE04 8952 ARABIC, // 1EE05..1EE1F 8953 UNKNOWN, // 1EE20 8954 ARABIC, // 1EE21..1EE22 8955 UNKNOWN, // 1EE23 8956 ARABIC, // 1EE24 8957 UNKNOWN, // 1EE25..1EE26 8958 ARABIC, // 1EE27 8959 UNKNOWN, // 1EE28 8960 ARABIC, // 1EE29..1EE32 8961 UNKNOWN, // 1EE33 8962 ARABIC, // 1EE34..1EE37 8963 UNKNOWN, // 1EE38 8964 ARABIC, // 1EE39 8965 UNKNOWN, // 1EE3A 8966 ARABIC, // 1EE3B 8967 UNKNOWN, // 1EE3C..1EE41 8968 ARABIC, // 1EE42 8969 UNKNOWN, // 1EE43..1EE46 8970 ARABIC, // 1EE47 8971 UNKNOWN, // 1EE48 8972 ARABIC, // 1EE49 8973 UNKNOWN, // 1EE4A 8974 ARABIC, // 1EE4B 8975 UNKNOWN, // 1EE4C 8976 ARABIC, // 1EE4D..1EE4F 8977 UNKNOWN, // 1EE50 8978 ARABIC, // 1EE51..1EE52 8979 UNKNOWN, // 1EE53 8980 ARABIC, // 1EE54 8981 UNKNOWN, // 1EE55..1EE56 8982 ARABIC, // 1EE57 8983 UNKNOWN, // 1EE58 8984 ARABIC, // 1EE59 8985 UNKNOWN, // 1EE5A 8986 ARABIC, // 1EE5B 8987 UNKNOWN, // 1EE5C 8988 ARABIC, // 1EE5D 8989 UNKNOWN, // 1EE5E 8990 ARABIC, // 1EE5F 8991 UNKNOWN, // 1EE60 8992 ARABIC, // 1EE61..1EE62 8993 UNKNOWN, // 1EE63 8994 ARABIC, // 1EE64 8995 UNKNOWN, // 1EE65..1EE66 8996 ARABIC, // 1EE67..1EE6A 8997 UNKNOWN, // 1EE6B 8998 ARABIC, // 1EE6C..1EE72 8999 UNKNOWN, // 1EE73 9000 ARABIC, // 1EE74..1EE77 9001 UNKNOWN, // 1EE78 9002 ARABIC, // 1EE79..1EE7C 9003 UNKNOWN, // 1EE7D 9004 ARABIC, // 1EE7E 9005 UNKNOWN, // 1EE7F 9006 ARABIC, // 1EE80..1EE89 9007 UNKNOWN, // 1EE8A 9008 ARABIC, // 1EE8B..1EE9B 9009 UNKNOWN, // 1EE9C..1EEA0 9010 ARABIC, // 1EEA1..1EEA3 9011 UNKNOWN, // 1EEA4 9012 ARABIC, // 1EEA5..1EEA9 9013 UNKNOWN, // 1EEAA 9014 ARABIC, // 1EEAB..1EEBB 9015 UNKNOWN, // 1EEBC..1EEEF 9016 ARABIC, // 1EEF0..1EEF1 9017 UNKNOWN, // 1EEF2..1EFFF 9018 COMMON, // 1F000..1F02B 9019 UNKNOWN, // 1F02C..1F02F 9020 COMMON, // 1F030..1F093 9021 UNKNOWN, // 1F094..1F09F 9022 COMMON, // 1F0A0..1F0AE 9023 UNKNOWN, // 1F0AF..1F0B0 9024 COMMON, // 1F0B1..1F0BF 9025 UNKNOWN, // 1F0C0 9026 COMMON, // 1F0C1..1F0CF 9027 UNKNOWN, // 1F0D0 9028 COMMON, // 1F0D1..1F0F5 9029 UNKNOWN, // 1F0F6..1F0FF 9030 COMMON, // 1F100..1F1AD 9031 UNKNOWN, // 1F1AE..1F1E5 9032 COMMON, // 1F1E6..1F1FF 9033 HIRAGANA, // 1F200 9034 COMMON, // 1F201..1F202 9035 UNKNOWN, // 1F203..1F20F 9036 COMMON, // 1F210..1F23B 9037 UNKNOWN, // 1F23C..1F23F 9038 COMMON, // 1F240..1F248 9039 UNKNOWN, // 1F249..1F24F 9040 COMMON, // 1F250..1F251 9041 UNKNOWN, // 1F252..1F25F 9042 COMMON, // 1F260..1F265 9043 UNKNOWN, // 1F266..1F2FF 9044 COMMON, // 1F300..1F6D8 9045 UNKNOWN, // 1F6D9..1F6DB 9046 COMMON, // 1F6DC..1F6EC 9047 UNKNOWN, // 1F6ED..1F6EF 9048 COMMON, // 1F6F0..1F6FC 9049 UNKNOWN, // 1F6FD..1F6FF 9050 COMMON, // 1F700..1F7D9 9051 UNKNOWN, // 1F7DA..1F7DF 9052 COMMON, // 1F7E0..1F7EB 9053 UNKNOWN, // 1F7EC..1F7EF 9054 COMMON, // 1F7F0 9055 UNKNOWN, // 1F7F1..1F7FF 9056 COMMON, // 1F800..1F80B 9057 UNKNOWN, // 1F80C..1F80F 9058 COMMON, // 1F810..1F847 9059 UNKNOWN, // 1F848..1F84F 9060 COMMON, // 1F850..1F859 9061 UNKNOWN, // 1F85A..1F85F 9062 COMMON, // 1F860..1F887 9063 UNKNOWN, // 1F888..1F88F 9064 COMMON, // 1F890..1F8AD 9065 UNKNOWN, // 1F8AE..1F8AF 9066 COMMON, // 1F8B0..1F8BB 9067 UNKNOWN, // 1F8BC..1F8BF 9068 COMMON, // 1F8C0..1F8C1 9069 UNKNOWN, // 1F8C2..1F8CF 9070 COMMON, // 1F8D0..1F8D8 9071 UNKNOWN, // 1F8D9..1F8FF 9072 COMMON, // 1F900..1FA57 9073 UNKNOWN, // 1FA58..1FA5F 9074 COMMON, // 1FA60..1FA6D 9075 UNKNOWN, // 1FA6E..1FA6F 9076 COMMON, // 1FA70..1FA7C 9077 UNKNOWN, // 1FA7D..1FA7F 9078 COMMON, // 1FA80..1FA8A 9079 UNKNOWN, // 1FA8B..1FA8D 9080 COMMON, // 1FA8E..1FAC6 9081 UNKNOWN, // 1FAC7 9082 COMMON, // 1FAC8 9083 UNKNOWN, // 1FAC9..1FACC 9084 COMMON, // 1FACD..1FADC 9085 UNKNOWN, // 1FADD..1FADE 9086 COMMON, // 1FADF..1FAEA 9087 UNKNOWN, // 1FAEB..1FAEE 9088 COMMON, // 1FAEF..1FAF8 9089 UNKNOWN, // 1FAF9..1FAFF 9090 COMMON, // 1FB00..1FB92 9091 UNKNOWN, // 1FB93 9092 COMMON, // 1FB94..1FBFA 9093 UNKNOWN, // 1FBFB..1FFFF 9094 HAN, // 20000..2A6DF 9095 UNKNOWN, // 2A6E0..2A6FF 9096 HAN, // 2A700..2B81D 9097 UNKNOWN, // 2B81E..2B81F 9098 HAN, // 2B820..2CEAD 9099 UNKNOWN, // 2CEAE..2CEAF 9100 HAN, // 2CEB0..2EBE0 9101 UNKNOWN, // 2EBE1..2EBEF 9102 HAN, // 2EBF0..2EE5D 9103 UNKNOWN, // 2EE5E..2F7FF 9104 HAN, // 2F800..2FA1D 9105 UNKNOWN, // 2FA1E..2FFFF 9106 HAN, // 30000..3134A 9107 UNKNOWN, // 3134B..3134F 9108 HAN, // 31350..33479 9109 UNKNOWN, // 3347A..E0000 9110 COMMON, // E0001 9111 UNKNOWN, // E0002..E001F 9112 COMMON, // E0020..E007F 9113 UNKNOWN, // E0080..E00FF 9114 INHERITED, // E0100..E01EF 9115 UNKNOWN, // E01F0..10FFFF 9116 }; 9117 9118 private static final HashMap<String, Character.UnicodeScript> aliases; 9119 static { 9120 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 9121 aliases.put("ADLM", ADLAM); 9122 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 9123 aliases.put("AHOM", AHOM); 9124 aliases.put("ARAB", ARABIC); 9125 aliases.put("ARMI", IMPERIAL_ARAMAIC); 9126 aliases.put("ARMN", ARMENIAN); 9127 aliases.put("AVST", AVESTAN); 9128 aliases.put("BALI", BALINESE); 9129 aliases.put("BAMU", BAMUM); 9130 aliases.put("BASS", BASSA_VAH); 9131 aliases.put("BATK", BATAK); 9132 aliases.put("BENG", BENGALI); 9133 aliases.put("BERF", BERIA_ERFE); 9134 aliases.put("BHKS", BHAIKSUKI); 9135 aliases.put("BOPO", BOPOMOFO); 9136 aliases.put("BRAH", BRAHMI); 9137 aliases.put("BRAI", BRAILLE); 9138 aliases.put("BUGI", BUGINESE); 9139 aliases.put("BUHD", BUHID); 9140 aliases.put("CAKM", CHAKMA); 9141 aliases.put("CANS", CANADIAN_ABORIGINAL); 9142 aliases.put("CARI", CARIAN); 9143 aliases.put("CHAM", CHAM); 9144 aliases.put("CHER", CHEROKEE); 9145 aliases.put("CHRS", CHORASMIAN); 9146 aliases.put("COPT", COPTIC); 9147 aliases.put("CPMN", CYPRO_MINOAN); 9148 aliases.put("CPRT", CYPRIOT); 9149 aliases.put("CYRL", CYRILLIC); 9150 aliases.put("DEVA", DEVANAGARI); 9151 aliases.put("DIAK", DIVES_AKURU); 9152 aliases.put("DOGR", DOGRA); 9153 aliases.put("DSRT", DESERET); 9154 aliases.put("DUPL", DUPLOYAN); 9155 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 9156 aliases.put("ELBA", ELBASAN); 9157 aliases.put("ELYM", ELYMAIC); 9158 aliases.put("ETHI", ETHIOPIC); 9159 aliases.put("GARA", GARAY); 9160 aliases.put("GEOR", GEORGIAN); 9161 aliases.put("GLAG", GLAGOLITIC); 9162 aliases.put("GONG", GUNJALA_GONDI); 9163 aliases.put("GONM", MASARAM_GONDI); 9164 aliases.put("GOTH", GOTHIC); 9165 aliases.put("GRAN", GRANTHA); 9166 aliases.put("GREK", GREEK); 9167 aliases.put("GUJR", GUJARATI); 9168 aliases.put("GUKH", GURUNG_KHEMA); 9169 aliases.put("GURU", GURMUKHI); 9170 aliases.put("HANG", HANGUL); 9171 aliases.put("HANI", HAN); 9172 aliases.put("HANO", HANUNOO); 9173 aliases.put("HATR", HATRAN); 9174 aliases.put("HEBR", HEBREW); 9175 aliases.put("HIRA", HIRAGANA); 9176 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 9177 aliases.put("HMNG", PAHAWH_HMONG); 9178 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 9179 aliases.put("HUNG", OLD_HUNGARIAN); 9180 aliases.put("ITAL", OLD_ITALIC); 9181 aliases.put("JAVA", JAVANESE); 9182 aliases.put("KALI", KAYAH_LI); 9183 aliases.put("KANA", KATAKANA); 9184 aliases.put("KAWI", KAWI); 9185 aliases.put("KHAR", KHAROSHTHI); 9186 aliases.put("KHMR", KHMER); 9187 aliases.put("KHOJ", KHOJKI); 9188 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 9189 aliases.put("KNDA", KANNADA); 9190 aliases.put("KRAI", KIRAT_RAI); 9191 aliases.put("KTHI", KAITHI); 9192 aliases.put("LANA", TAI_THAM); 9193 aliases.put("LAOO", LAO); 9194 aliases.put("LATN", LATIN); 9195 aliases.put("LEPC", LEPCHA); 9196 aliases.put("LIMB", LIMBU); 9197 aliases.put("LINA", LINEAR_A); 9198 aliases.put("LINB", LINEAR_B); 9199 aliases.put("LISU", LISU); 9200 aliases.put("LYCI", LYCIAN); 9201 aliases.put("LYDI", LYDIAN); 9202 aliases.put("MAHJ", MAHAJANI); 9203 aliases.put("MAKA", MAKASAR); 9204 aliases.put("MAND", MANDAIC); 9205 aliases.put("MANI", MANICHAEAN); 9206 aliases.put("MARC", MARCHEN); 9207 aliases.put("MEDF", MEDEFAIDRIN); 9208 aliases.put("MEND", MENDE_KIKAKUI); 9209 aliases.put("MERC", MEROITIC_CURSIVE); 9210 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 9211 aliases.put("MLYM", MALAYALAM); 9212 aliases.put("MODI", MODI); 9213 aliases.put("MONG", MONGOLIAN); 9214 aliases.put("MROO", MRO); 9215 aliases.put("MTEI", MEETEI_MAYEK); 9216 aliases.put("MULT", MULTANI); 9217 aliases.put("MYMR", MYANMAR); 9218 aliases.put("NAGM", NAG_MUNDARI); 9219 aliases.put("NAND", NANDINAGARI); 9220 aliases.put("NARB", OLD_NORTH_ARABIAN); 9221 aliases.put("NBAT", NABATAEAN); 9222 aliases.put("NEWA", NEWA); 9223 aliases.put("NKOO", NKO); 9224 aliases.put("NSHU", NUSHU); 9225 aliases.put("OGAM", OGHAM); 9226 aliases.put("OLCK", OL_CHIKI); 9227 aliases.put("ONAO", OL_ONAL); 9228 aliases.put("ORKH", OLD_TURKIC); 9229 aliases.put("ORYA", ORIYA); 9230 aliases.put("OSGE", OSAGE); 9231 aliases.put("OSMA", OSMANYA); 9232 aliases.put("OUGR", OLD_UYGHUR); 9233 aliases.put("PALM", PALMYRENE); 9234 aliases.put("PAUC", PAU_CIN_HAU); 9235 aliases.put("PERM", OLD_PERMIC); 9236 aliases.put("PHAG", PHAGS_PA); 9237 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 9238 aliases.put("PHLP", PSALTER_PAHLAVI); 9239 aliases.put("PHNX", PHOENICIAN); 9240 aliases.put("PLRD", MIAO); 9241 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 9242 aliases.put("RJNG", REJANG); 9243 aliases.put("ROHG", HANIFI_ROHINGYA); 9244 aliases.put("RUNR", RUNIC); 9245 aliases.put("SAMR", SAMARITAN); 9246 aliases.put("SARB", OLD_SOUTH_ARABIAN); 9247 aliases.put("SAUR", SAURASHTRA); 9248 aliases.put("SGNW", SIGNWRITING); 9249 aliases.put("SHAW", SHAVIAN); 9250 aliases.put("SHRD", SHARADA); 9251 aliases.put("SIDD", SIDDHAM); 9252 aliases.put("SIDT", SIDETIC); 9253 aliases.put("SIND", KHUDAWADI); 9254 aliases.put("SINH", SINHALA); 9255 aliases.put("SOGD", SOGDIAN); 9256 aliases.put("SOGO", OLD_SOGDIAN); 9257 aliases.put("SORA", SORA_SOMPENG); 9258 aliases.put("SOYO", SOYOMBO); 9259 aliases.put("SUND", SUNDANESE); 9260 aliases.put("SUNU", SUNUWAR); 9261 aliases.put("SYLO", SYLOTI_NAGRI); 9262 aliases.put("SYRC", SYRIAC); 9263 aliases.put("TAGB", TAGBANWA); 9264 aliases.put("TAKR", TAKRI); 9265 aliases.put("TALE", TAI_LE); 9266 aliases.put("TALU", NEW_TAI_LUE); 9267 aliases.put("TAML", TAMIL); 9268 aliases.put("TANG", TANGUT); 9269 aliases.put("TAVT", TAI_VIET); 9270 aliases.put("TAYO", TAI_YO); 9271 aliases.put("TELU", TELUGU); 9272 aliases.put("TFNG", TIFINAGH); 9273 aliases.put("TGLG", TAGALOG); 9274 aliases.put("THAA", THAANA); 9275 aliases.put("THAI", THAI); 9276 aliases.put("TIBT", TIBETAN); 9277 aliases.put("TIRH", TIRHUTA); 9278 aliases.put("TNSA", TANGSA); 9279 aliases.put("TODR", TODHRI); 9280 aliases.put("TOLS", TOLONG_SIKI); 9281 aliases.put("TOTO", TOTO); 9282 aliases.put("TUTG", TULU_TIGALARI); 9283 aliases.put("UGAR", UGARITIC); 9284 aliases.put("VAII", VAI); 9285 aliases.put("VITH", VITHKUQI); 9286 aliases.put("WARA", WARANG_CITI); 9287 aliases.put("WCHO", WANCHO); 9288 aliases.put("XPEO", OLD_PERSIAN); 9289 aliases.put("XSUX", CUNEIFORM); 9290 aliases.put("YEZI", YEZIDI); 9291 aliases.put("YIII", YI); 9292 aliases.put("ZANB", ZANABAZAR_SQUARE); 9293 aliases.put("ZINH", INHERITED); 9294 aliases.put("ZYYY", COMMON); 9295 aliases.put("ZZZZ", UNKNOWN); 9296 } 9297 9298 /** 9299 * Returns the enum constant representing the Unicode script of which 9300 * the given character (Unicode code point) is assigned to. 9301 * 9302 * @param codePoint the character (Unicode code point) in question. 9303 * @return The {@code UnicodeScript} constant representing the 9304 * Unicode script of which this character is assigned to. 9305 * 9306 * @throws IllegalArgumentException if the specified 9307 * {@code codePoint} is an invalid Unicode code point. 9308 * @see Character#isValidCodePoint(int) 9309 * 9310 */ 9311 public static UnicodeScript of(int codePoint) { 9312 if (!isValidCodePoint(codePoint)) 9313 throw new IllegalArgumentException( 9314 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9315 int type = getType(codePoint); 9316 // leave SURROGATE and PRIVATE_USE for table lookup 9317 if (type == UNASSIGNED) 9318 return UNKNOWN; 9319 int index = Arrays.binarySearch(scriptStarts, codePoint); 9320 if (index < 0) 9321 index = -index - 2; 9322 return scripts[index]; 9323 } 9324 9325 /** 9326 * Returns the UnicodeScript constant with the given Unicode script 9327 * name or the script name alias. Script names and their aliases are 9328 * determined by The Unicode Standard. The files {@code Scripts.txt} 9329 * and {@code PropertyValueAliases.txt} define script names 9330 * and the script name aliases for a particular version of the 9331 * standard. The {@link Character} class specifies the version of 9332 * the standard that it supports. 9333 * <p> 9334 * Character case is ignored for all of the valid script names. 9335 * The en_US locale's case mapping rules are used to provide 9336 * case-insensitive string comparisons for script name validation. 9337 * 9338 * @param scriptName A {@code UnicodeScript} name. 9339 * @return The {@code UnicodeScript} constant identified 9340 * by {@code scriptName} 9341 * @throws IllegalArgumentException if {@code scriptName} is an 9342 * invalid name 9343 * @throws NullPointerException if {@code scriptName} is null 9344 */ 9345 public static final UnicodeScript forName(String scriptName) { 9346 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 9347 //.replace(' ', '_')); 9348 UnicodeScript sc = aliases.get(scriptName); 9349 if (sc != null) 9350 return sc; 9351 return valueOf(scriptName); 9352 } 9353 } 9354 9355 /** 9356 * The value of the {@code Character}. 9357 * 9358 * @serial 9359 */ 9360 private final char value; 9361 9362 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 9363 @java.io.Serial 9364 private static final long serialVersionUID = 3786198910865385080L; 9365 9366 /** 9367 * Constructs a newly allocated {@code Character} object that 9368 * represents the specified {@code char} value. 9369 * 9370 * @param value the value to be represented by the 9371 * {@code Character} object. 9372 * 9373 * @deprecated 9374 * It is rarely appropriate to use this constructor. The static factory 9375 * {@link #valueOf(char)} is generally a better choice, as it is 9376 * likely to yield significantly better space and time performance. 9377 */ 9378 @Deprecated(since="9") 9379 public Character(char value) { 9380 this.value = value; 9381 } 9382 9383 @AOTSafeClassInitializer 9384 private static final class CharacterCache { 9385 private CharacterCache(){} 9386 9387 @Stable 9388 static final Character[] cache; 9389 static Character[] archivedCache; 9390 9391 static { 9392 int size = 127 + 1; 9393 9394 // Load and use the archived cache if it exists 9395 CDS.initializeFromArchive(CharacterCache.class); 9396 if (archivedCache == null) { 9397 Character[] c = new Character[size]; 9398 for (int i = 0; i < size; i++) { 9399 c[i] = new Character((char) i); 9400 } 9401 archivedCache = c; 9402 } 9403 cache = archivedCache; 9404 assert cache.length == size; 9405 } 9406 } 9407 9408 /** 9409 * Returns a {@code Character} instance representing the specified 9410 * {@code char} value. 9411 * If a new {@code Character} instance is not required, this method 9412 * should generally be used in preference to the constructor 9413 * {@link #Character(char)}, as this method is likely to yield 9414 * significantly better space and time performance by caching 9415 * frequently requested values. 9416 * 9417 * This method will always cache values in the range {@code 9418 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9419 * cache other values outside of this range. 9420 * 9421 * @param c a char value. 9422 * @return a {@code Character} instance representing {@code c}. 9423 * @since 1.5 9424 */ 9425 @IntrinsicCandidate 9426 public static Character valueOf(char c) { 9427 if (c <= 127) { // must cache 9428 return CharacterCache.cache[(int)c]; 9429 } 9430 return new Character(c); 9431 } 9432 9433 /** 9434 * Returns the value of this {@code Character} object. 9435 * @return the primitive {@code char} value represented by 9436 * this object. 9437 */ 9438 @IntrinsicCandidate 9439 public char charValue() { 9440 return value; 9441 } 9442 9443 /** 9444 * Returns a hash code for this {@code Character}; equal to the result 9445 * of invoking {@code charValue()}. 9446 * 9447 * @return a hash code value for this {@code Character} 9448 */ 9449 @Override 9450 public int hashCode() { 9451 return Character.hashCode(value); 9452 } 9453 9454 /** 9455 * Returns a hash code for a {@code char} value; compatible with 9456 * {@code Character.hashCode()}. 9457 * 9458 * @since 1.8 9459 * 9460 * @param value The {@code char} for which to return a hash code. 9461 * @return a hash code value for a {@code char} value. 9462 */ 9463 public static int hashCode(char value) { 9464 return (int)value; 9465 } 9466 9467 /** 9468 * Compares this object against the specified object. 9469 * The result is {@code true} if and only if the argument is not 9470 * {@code null} and is a {@code Character} object that 9471 * represents the same {@code char} value as this object. 9472 * 9473 * @param obj the object to compare with. 9474 * @return {@code true} if the objects are the same; 9475 * {@code false} otherwise. 9476 */ 9477 public boolean equals(Object obj) { 9478 if (obj instanceof Character c) { 9479 return value == c.charValue(); 9480 } 9481 return false; 9482 } 9483 9484 /** 9485 * Returns a {@code String} object representing this 9486 * {@code Character}'s value. The result is a string of 9487 * length 1 whose sole component is the primitive 9488 * {@code char} value represented by this 9489 * {@code Character} object. 9490 * 9491 * @return a string representation of this object. 9492 */ 9493 @Override 9494 public String toString() { 9495 return String.valueOf(value); 9496 } 9497 9498 /** 9499 * Returns a {@code String} object representing the 9500 * specified {@code char}. The result is a string of length 9501 * 1 consisting solely of the specified {@code char}. 9502 * 9503 * @apiNote This method cannot handle <a 9504 * href="#supplementary"> supplementary characters</a>. To support 9505 * all Unicode characters, including supplementary characters, use 9506 * the {@link #toString(int)} method. 9507 * 9508 * @param c the {@code char} to be converted 9509 * @return the string representation of the specified {@code char} 9510 * @since 1.4 9511 */ 9512 public static String toString(char c) { 9513 return String.valueOf(c); 9514 } 9515 9516 /** 9517 * Returns a {@code String} object representing the 9518 * specified character (Unicode code point). The result is a string of 9519 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9520 * 9521 * @param codePoint the {@code codePoint} to be converted 9522 * @return the string representation of the specified {@code codePoint} 9523 * @throws IllegalArgumentException if the specified 9524 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9525 * valid Unicode code point}. 9526 * @since 11 9527 */ 9528 public static String toString(int codePoint) { 9529 return String.valueOfCodePoint(codePoint); 9530 } 9531 9532 /** 9533 * Determines whether the specified code point is a valid 9534 * <a href="http://www.unicode.org/glossary/#code_point"> 9535 * Unicode code point value</a>. 9536 * 9537 * @param codePoint the Unicode code point to be tested 9538 * @return {@code true} if the specified code point value is between 9539 * {@link #MIN_CODE_POINT} and 9540 * {@link #MAX_CODE_POINT} inclusive; 9541 * {@code false} otherwise. 9542 * @since 1.5 9543 */ 9544 public static boolean isValidCodePoint(int codePoint) { 9545 // Optimized form of: 9546 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9547 int plane = codePoint >>> 16; 9548 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9549 } 9550 9551 /** 9552 * Determines whether the specified character (Unicode code point) 9553 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9554 * Such code points can be represented using a single {@code char}. 9555 * 9556 * @param codePoint the character (Unicode code point) to be tested 9557 * @return {@code true} if the specified code point is between 9558 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9559 * {@code false} otherwise. 9560 * @since 1.7 9561 */ 9562 public static boolean isBmpCodePoint(int codePoint) { 9563 return codePoint >>> 16 == 0; 9564 // Optimized form of: 9565 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9566 // We consistently use logical shift (>>>) to facilitate 9567 // additional runtime optimizations. 9568 } 9569 9570 /** 9571 * Determines whether the specified character (Unicode code point) 9572 * is in the <a href="#supplementary">supplementary character</a> range. 9573 * 9574 * @param codePoint the character (Unicode code point) to be tested 9575 * @return {@code true} if the specified code point is between 9576 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9577 * {@link #MAX_CODE_POINT} inclusive; 9578 * {@code false} otherwise. 9579 * @since 1.5 9580 */ 9581 public static boolean isSupplementaryCodePoint(int codePoint) { 9582 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9583 && codePoint < MAX_CODE_POINT + 1; 9584 } 9585 9586 /** 9587 * Determines if the given {@code char} value is a 9588 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9589 * Unicode high-surrogate code unit</a> 9590 * (also known as <i>leading-surrogate code unit</i>). 9591 * 9592 * <p>Such values do not represent characters by themselves, 9593 * but are used in the representation of 9594 * <a href="#supplementary">supplementary characters</a> 9595 * in the UTF-16 encoding. 9596 * 9597 * @param ch the {@code char} value to be tested. 9598 * @return {@code true} if the {@code char} value is between 9599 * {@link #MIN_HIGH_SURROGATE} and 9600 * {@link #MAX_HIGH_SURROGATE} inclusive; 9601 * {@code false} otherwise. 9602 * @see Character#isLowSurrogate(char) 9603 * @see Character.UnicodeBlock#of(int) 9604 * @since 1.5 9605 */ 9606 public static boolean isHighSurrogate(char ch) { 9607 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9608 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9609 } 9610 9611 /** 9612 * Determines if the given {@code char} value is a 9613 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9614 * Unicode low-surrogate code unit</a> 9615 * (also known as <i>trailing-surrogate code unit</i>). 9616 * 9617 * <p>Such values do not represent characters by themselves, 9618 * but are used in the representation of 9619 * <a href="#supplementary">supplementary characters</a> 9620 * in the UTF-16 encoding. 9621 * 9622 * @param ch the {@code char} value to be tested. 9623 * @return {@code true} if the {@code char} value is between 9624 * {@link #MIN_LOW_SURROGATE} and 9625 * {@link #MAX_LOW_SURROGATE} inclusive; 9626 * {@code false} otherwise. 9627 * @see Character#isHighSurrogate(char) 9628 * @since 1.5 9629 */ 9630 public static boolean isLowSurrogate(char ch) { 9631 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9632 } 9633 9634 /** 9635 * Determines if the given {@code char} value is a Unicode 9636 * <i>surrogate code unit</i>. 9637 * 9638 * <p>Such values do not represent characters by themselves, 9639 * but are used in the representation of 9640 * <a href="#supplementary">supplementary characters</a> 9641 * in the UTF-16 encoding. 9642 * 9643 * <p>A char value is a surrogate code unit if and only if it is either 9644 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9645 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9646 * 9647 * @param ch the {@code char} value to be tested. 9648 * @return {@code true} if the {@code char} value is between 9649 * {@link #MIN_SURROGATE} and 9650 * {@link #MAX_SURROGATE} inclusive; 9651 * {@code false} otherwise. 9652 * @since 1.7 9653 */ 9654 public static boolean isSurrogate(char ch) { 9655 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9656 } 9657 9658 /** 9659 * Determines whether the specified pair of {@code char} 9660 * values is a valid 9661 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9662 * Unicode surrogate pair</a>. 9663 * 9664 * <p>This method is equivalent to the expression: 9665 * <blockquote><pre>{@code 9666 * isHighSurrogate(high) && isLowSurrogate(low) 9667 * }</pre></blockquote> 9668 * 9669 * @param high the high-surrogate code value to be tested 9670 * @param low the low-surrogate code value to be tested 9671 * @return {@code true} if the specified high and 9672 * low-surrogate code values represent a valid surrogate pair; 9673 * {@code false} otherwise. 9674 * @since 1.5 9675 */ 9676 public static boolean isSurrogatePair(char high, char low) { 9677 return isHighSurrogate(high) && isLowSurrogate(low); 9678 } 9679 9680 /** 9681 * Determines the number of {@code char} values needed to 9682 * represent the specified character (Unicode code point). If the 9683 * specified character is equal to or greater than 0x10000, then 9684 * the method returns 2. Otherwise, the method returns 1. 9685 * 9686 * <p>This method doesn't validate the specified character to be a 9687 * valid Unicode code point. The caller must validate the 9688 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9689 * if necessary. 9690 * 9691 * @param codePoint the character (Unicode code point) to be tested. 9692 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9693 * @see Character#isSupplementaryCodePoint(int) 9694 * @since 1.5 9695 */ 9696 public static int charCount(int codePoint) { 9697 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9698 } 9699 9700 /** 9701 * Converts the specified surrogate pair to its supplementary code 9702 * point value. This method does not validate the specified 9703 * surrogate pair. The caller must validate it using {@link 9704 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9705 * 9706 * @param high the high-surrogate code unit 9707 * @param low the low-surrogate code unit 9708 * @return the supplementary code point composed from the 9709 * specified surrogate pair. 9710 * @since 1.5 9711 */ 9712 public static int toCodePoint(char high, char low) { 9713 // Optimized form of: 9714 // return ((high - MIN_HIGH_SURROGATE) << 10) 9715 // + (low - MIN_LOW_SURROGATE) 9716 // + MIN_SUPPLEMENTARY_CODE_POINT; 9717 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9718 - (MIN_HIGH_SURROGATE << 10) 9719 - MIN_LOW_SURROGATE); 9720 } 9721 9722 /** 9723 * Returns the code point at the given index of the 9724 * {@code CharSequence}. If the {@code char} value at 9725 * the given index in the {@code CharSequence} is in the 9726 * high-surrogate range, the following index is less than the 9727 * length of the {@code CharSequence}, and the 9728 * {@code char} value at the following index is in the 9729 * low-surrogate range, then the supplementary code point 9730 * corresponding to this surrogate pair is returned. Otherwise, 9731 * the {@code char} value at the given index is returned. 9732 * 9733 * @param seq a sequence of {@code char} values (Unicode code 9734 * units) 9735 * @param index the index to the {@code char} values (Unicode 9736 * code units) in {@code seq} to be converted 9737 * @return the Unicode code point at the given index 9738 * @throws NullPointerException if {@code seq} is null. 9739 * @throws IndexOutOfBoundsException if the value 9740 * {@code index} is negative or not less than 9741 * {@link CharSequence#length() seq.length()}. 9742 * @since 1.5 9743 */ 9744 public static int codePointAt(CharSequence seq, int index) { 9745 char c1 = seq.charAt(index); 9746 if (isHighSurrogate(c1) && ++index < seq.length()) { 9747 char c2 = seq.charAt(index); 9748 if (isLowSurrogate(c2)) { 9749 return toCodePoint(c1, c2); 9750 } 9751 } 9752 return c1; 9753 } 9754 9755 /** 9756 * Returns the code point at the given index of the 9757 * {@code char} array. If the {@code char} value at 9758 * the given index in the {@code char} array is in the 9759 * high-surrogate range, the following index is less than the 9760 * length of the {@code char} array, and the 9761 * {@code char} value at the following index is in the 9762 * low-surrogate range, then the supplementary code point 9763 * corresponding to this surrogate pair is returned. Otherwise, 9764 * the {@code char} value at the given index is returned. 9765 * 9766 * @param a the {@code char} array 9767 * @param index the index to the {@code char} values (Unicode 9768 * code units) in the {@code char} array to be converted 9769 * @return the Unicode code point at the given index 9770 * @throws NullPointerException if {@code a} is null. 9771 * @throws IndexOutOfBoundsException if the value 9772 * {@code index} is negative or not less than 9773 * the length of the {@code char} array. 9774 * @since 1.5 9775 */ 9776 public static int codePointAt(char[] a, int index) { 9777 return codePointAtImpl(a, index, a.length); 9778 } 9779 9780 /** 9781 * Returns the code point at the given index of the 9782 * {@code char} array, where only array elements with 9783 * {@code index} less than {@code limit} can be used. If 9784 * the {@code char} value at the given index in the 9785 * {@code char} array is in the high-surrogate range, the 9786 * following index is less than the {@code limit}, and the 9787 * {@code char} value at the following index is in the 9788 * low-surrogate range, then the supplementary code point 9789 * corresponding to this surrogate pair is returned. Otherwise, 9790 * the {@code char} value at the given index is returned. 9791 * 9792 * @param a the {@code char} array 9793 * @param index the index to the {@code char} values (Unicode 9794 * code units) in the {@code char} array to be converted 9795 * @param limit the index after the last array element that 9796 * can be used in the {@code char} array 9797 * @return the Unicode code point at the given index 9798 * @throws NullPointerException if {@code a} is null. 9799 * @throws IndexOutOfBoundsException if the {@code index} 9800 * argument is negative or not less than the {@code limit} 9801 * argument, or if the {@code limit} argument is negative or 9802 * greater than the length of the {@code char} array. 9803 * @since 1.5 9804 */ 9805 public static int codePointAt(char[] a, int index, int limit) { 9806 if (index >= limit || index < 0 || limit > a.length) { 9807 throw new IndexOutOfBoundsException(); 9808 } 9809 return codePointAtImpl(a, index, limit); 9810 } 9811 9812 // throws ArrayIndexOutOfBoundsException if index out of bounds 9813 static int codePointAtImpl(char[] a, int index, int limit) { 9814 char c1 = a[index]; 9815 if (isHighSurrogate(c1) && ++index < limit) { 9816 char c2 = a[index]; 9817 if (isLowSurrogate(c2)) { 9818 return toCodePoint(c1, c2); 9819 } 9820 } 9821 return c1; 9822 } 9823 9824 /** 9825 * Returns the code point preceding the given index of the 9826 * {@code CharSequence}. If the {@code char} value at 9827 * {@code (index - 1)} in the {@code CharSequence} is in 9828 * the low-surrogate range, {@code (index - 2)} is not 9829 * negative, and the {@code char} value at {@code (index - 2)} 9830 * in the {@code CharSequence} is in the 9831 * high-surrogate range, then the supplementary code point 9832 * corresponding to this surrogate pair is returned. Otherwise, 9833 * the {@code char} value at {@code (index - 1)} is 9834 * returned. 9835 * 9836 * @param seq the {@code CharSequence} instance 9837 * @param index the index following the code point that should be returned 9838 * @return the Unicode code point value before the given index. 9839 * @throws NullPointerException if {@code seq} is null. 9840 * @throws IndexOutOfBoundsException if the {@code index} 9841 * argument is less than 1 or greater than {@link 9842 * CharSequence#length() seq.length()}. 9843 * @since 1.5 9844 */ 9845 public static int codePointBefore(CharSequence seq, int index) { 9846 char c2 = seq.charAt(--index); 9847 if (isLowSurrogate(c2) && index > 0) { 9848 char c1 = seq.charAt(--index); 9849 if (isHighSurrogate(c1)) { 9850 return toCodePoint(c1, c2); 9851 } 9852 } 9853 return c2; 9854 } 9855 9856 /** 9857 * Returns the code point preceding the given index of the 9858 * {@code char} array. If the {@code char} value at 9859 * {@code (index - 1)} in the {@code char} array is in 9860 * the low-surrogate range, {@code (index - 2)} is not 9861 * negative, and the {@code char} value at {@code (index - 2)} 9862 * in the {@code char} array is in the 9863 * high-surrogate range, then the supplementary code point 9864 * corresponding to this surrogate pair is returned. Otherwise, 9865 * the {@code char} value at {@code (index - 1)} is 9866 * returned. 9867 * 9868 * @param a the {@code char} array 9869 * @param index the index following the code point that should be returned 9870 * @return the Unicode code point value before the given index. 9871 * @throws NullPointerException if {@code a} is null. 9872 * @throws IndexOutOfBoundsException if the {@code index} 9873 * argument is less than 1 or greater than the length of the 9874 * {@code char} array 9875 * @since 1.5 9876 */ 9877 public static int codePointBefore(char[] a, int index) { 9878 return codePointBeforeImpl(a, index, 0); 9879 } 9880 9881 /** 9882 * Returns the code point preceding the given index of the 9883 * {@code char} array, where only array elements with 9884 * {@code index} greater than or equal to {@code start} 9885 * can be used. If the {@code char} value at {@code (index - 1)} 9886 * in the {@code char} array is in the 9887 * low-surrogate range, {@code (index - 2)} is not less than 9888 * {@code start}, and the {@code char} value at 9889 * {@code (index - 2)} in the {@code char} array is in 9890 * the high-surrogate range, then the supplementary code point 9891 * corresponding to this surrogate pair is returned. Otherwise, 9892 * the {@code char} value at {@code (index - 1)} is 9893 * returned. 9894 * 9895 * @param a the {@code char} array 9896 * @param index the index following the code point that should be returned 9897 * @param start the index of the first array element in the 9898 * {@code char} array 9899 * @return the Unicode code point value before the given index. 9900 * @throws NullPointerException if {@code a} is null. 9901 * @throws IndexOutOfBoundsException if the {@code index} 9902 * argument is not greater than the {@code start} argument or 9903 * is greater than the length of the {@code char} array, or 9904 * if the {@code start} argument is negative or not less than 9905 * the length of the {@code char} array. 9906 * @since 1.5 9907 */ 9908 public static int codePointBefore(char[] a, int index, int start) { 9909 if (index <= start || start < 0 || index > a.length) { 9910 throw new IndexOutOfBoundsException(); 9911 } 9912 return codePointBeforeImpl(a, index, start); 9913 } 9914 9915 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 9916 static int codePointBeforeImpl(char[] a, int index, int start) { 9917 char c2 = a[--index]; 9918 if (isLowSurrogate(c2) && index > start) { 9919 char c1 = a[--index]; 9920 if (isHighSurrogate(c1)) { 9921 return toCodePoint(c1, c2); 9922 } 9923 } 9924 return c2; 9925 } 9926 9927 /** 9928 * Returns the leading surrogate (a 9929 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9930 * high surrogate code unit</a>) of the 9931 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9932 * surrogate pair</a> 9933 * representing the specified supplementary character (Unicode 9934 * code point) in the UTF-16 encoding. If the specified character 9935 * is not a 9936 * <a href="Character.html#supplementary">supplementary character</a>, 9937 * an unspecified {@code char} is returned. 9938 * 9939 * <p>If 9940 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9941 * is {@code true}, then 9942 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9943 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9944 * are also always {@code true}. 9945 * 9946 * @param codePoint a supplementary character (Unicode code point) 9947 * @return the leading surrogate code unit used to represent the 9948 * character in the UTF-16 encoding 9949 * @since 1.7 9950 */ 9951 public static char highSurrogate(int codePoint) { 9952 return (char) ((codePoint >>> 10) 9953 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9954 } 9955 9956 /** 9957 * Returns the trailing surrogate (a 9958 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9959 * low surrogate code unit</a>) of the 9960 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9961 * surrogate pair</a> 9962 * representing the specified supplementary character (Unicode 9963 * code point) in the UTF-16 encoding. If the specified character 9964 * is not a 9965 * <a href="Character.html#supplementary">supplementary character</a>, 9966 * an unspecified {@code char} is returned. 9967 * 9968 * <p>If 9969 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9970 * is {@code true}, then 9971 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9972 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9973 * are also always {@code true}. 9974 * 9975 * @param codePoint a supplementary character (Unicode code point) 9976 * @return the trailing surrogate code unit used to represent the 9977 * character in the UTF-16 encoding 9978 * @since 1.7 9979 */ 9980 public static char lowSurrogate(int codePoint) { 9981 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9982 } 9983 9984 /** 9985 * Converts the specified character (Unicode code point) to its 9986 * UTF-16 representation. If the specified code point is a BMP 9987 * (Basic Multilingual Plane or Plane 0) value, the same value is 9988 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9989 * specified code point is a supplementary character, its 9990 * surrogate values are stored in {@code dst[dstIndex]} 9991 * (high-surrogate) and {@code dst[dstIndex+1]} 9992 * (low-surrogate), and 2 is returned. 9993 * 9994 * @param codePoint the character (Unicode code point) to be converted. 9995 * @param dst an array of {@code char} in which the 9996 * {@code codePoint}'s UTF-16 value is stored. 9997 * @param dstIndex the start index into the {@code dst} 9998 * array where the converted value is stored. 9999 * @return 1 if the code point is a BMP code point, 2 if the 10000 * code point is a supplementary code point. 10001 * @throws IllegalArgumentException if the specified 10002 * {@code codePoint} is not a valid Unicode code point. 10003 * @throws NullPointerException if the specified {@code dst} is null. 10004 * @throws IndexOutOfBoundsException if {@code dstIndex} 10005 * is negative or not less than {@code dst.length}, or if 10006 * {@code dst} at {@code dstIndex} doesn't have enough 10007 * array element(s) to store the resulting {@code char} 10008 * value(s). (If {@code dstIndex} is equal to 10009 * {@code dst.length-1} and the specified 10010 * {@code codePoint} is a supplementary character, the 10011 * high-surrogate value is not stored in 10012 * {@code dst[dstIndex]}.) 10013 * @since 1.5 10014 */ 10015 public static int toChars(int codePoint, char[] dst, int dstIndex) { 10016 if (isBmpCodePoint(codePoint)) { 10017 dst[dstIndex] = (char) codePoint; 10018 return 1; 10019 } else if (isValidCodePoint(codePoint)) { 10020 toSurrogates(codePoint, dst, dstIndex); 10021 return 2; 10022 } else { 10023 throw new IllegalArgumentException( 10024 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 10025 } 10026 } 10027 10028 /** 10029 * Converts the specified character (Unicode code point) to its 10030 * UTF-16 representation stored in a {@code char} array. If 10031 * the specified code point is a BMP (Basic Multilingual Plane or 10032 * Plane 0) value, the resulting {@code char} array has 10033 * the same value as {@code codePoint}. If the specified code 10034 * point is a supplementary code point, the resulting 10035 * {@code char} array has the corresponding surrogate pair. 10036 * 10037 * @param codePoint a Unicode code point 10038 * @return a {@code char} array having 10039 * {@code codePoint}'s UTF-16 representation. 10040 * @throws IllegalArgumentException if the specified 10041 * {@code codePoint} is not a valid Unicode code point. 10042 * @since 1.5 10043 */ 10044 public static char[] toChars(int codePoint) { 10045 if (isBmpCodePoint(codePoint)) { 10046 return new char[] { (char) codePoint }; 10047 } else if (isValidCodePoint(codePoint)) { 10048 char[] result = new char[2]; 10049 toSurrogates(codePoint, result, 0); 10050 return result; 10051 } else { 10052 throw new IllegalArgumentException( 10053 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 10054 } 10055 } 10056 10057 static void toSurrogates(int codePoint, char[] dst, int index) { 10058 // We write elements "backwards" to guarantee all-or-nothing 10059 dst[index+1] = lowSurrogate(codePoint); 10060 dst[index] = highSurrogate(codePoint); 10061 } 10062 10063 /** 10064 * Returns the number of Unicode code points in the text range of 10065 * the specified char sequence. The text range begins at the 10066 * specified {@code beginIndex} and extends to the 10067 * {@code char} at index {@code endIndex - 1}. Thus the 10068 * length (in {@code char}s) of the text range is 10069 * {@code endIndex-beginIndex}. Unpaired surrogates within 10070 * the text range count as one code point each. 10071 * 10072 * @param seq the char sequence 10073 * @param beginIndex the index to the first {@code char} of 10074 * the text range. 10075 * @param endIndex the index after the last {@code char} of 10076 * the text range. 10077 * @return the number of Unicode code points in the specified text 10078 * range 10079 * @throws NullPointerException if {@code seq} is null. 10080 * @throws IndexOutOfBoundsException if the 10081 * {@code beginIndex} is negative, or {@code endIndex} 10082 * is larger than the length of the given sequence, or 10083 * {@code beginIndex} is larger than {@code endIndex}. 10084 * @since 1.5 10085 */ 10086 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 10087 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 10088 int n = endIndex - beginIndex; 10089 for (int i = beginIndex; i < endIndex; ) { 10090 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 10091 isLowSurrogate(seq.charAt(i))) { 10092 n--; 10093 i++; 10094 } 10095 } 10096 return n; 10097 } 10098 10099 /** 10100 * Returns the number of Unicode code points in a subarray of the 10101 * {@code char} array argument. The {@code offset} 10102 * argument is the index of the first {@code char} of the 10103 * subarray and the {@code count} argument specifies the 10104 * length of the subarray in {@code char}s. Unpaired 10105 * surrogates within the subarray count as one code point each. 10106 * 10107 * @param a the {@code char} array 10108 * @param offset the index of the first {@code char} in the 10109 * given {@code char} array 10110 * @param count the length of the subarray in {@code char}s 10111 * @return the number of Unicode code points in the specified subarray 10112 * @throws NullPointerException if {@code a} is null. 10113 * @throws IndexOutOfBoundsException if {@code offset} or 10114 * {@code count} is negative, or if {@code offset + 10115 * count} is larger than the length of the given array. 10116 * @since 1.5 10117 */ 10118 public static int codePointCount(char[] a, int offset, int count) { 10119 Objects.checkFromIndexSize(offset, count, a.length); 10120 return codePointCountImpl(a, offset, count); 10121 } 10122 10123 static int codePointCountImpl(char[] a, int offset, int count) { 10124 int endIndex = offset + count; 10125 int n = count; 10126 for (int i = offset; i < endIndex; ) { 10127 if (isHighSurrogate(a[i++]) && i < endIndex && 10128 isLowSurrogate(a[i])) { 10129 n--; 10130 i++; 10131 } 10132 } 10133 return n; 10134 } 10135 10136 /** 10137 * Returns the index within the given char sequence that is offset 10138 * from the given {@code index} by {@code codePointOffset} 10139 * code points. Unpaired surrogates within the text range given by 10140 * {@code index} and {@code codePointOffset} count as 10141 * one code point each. 10142 * 10143 * @param seq the char sequence 10144 * @param index the index to be offset 10145 * @param codePointOffset the offset in code points 10146 * @return the index within the char sequence 10147 * @throws NullPointerException if {@code seq} is null. 10148 * @throws IndexOutOfBoundsException if {@code index} 10149 * is negative or larger than the length of the char sequence, 10150 * or if {@code codePointOffset} is positive and the 10151 * subsequence starting with {@code index} has fewer than 10152 * {@code codePointOffset} code points, or if 10153 * {@code codePointOffset} is negative and the subsequence 10154 * before {@code index} has fewer than the absolute value 10155 * of {@code codePointOffset} code points. 10156 * @since 1.5 10157 */ 10158 public static int offsetByCodePoints(CharSequence seq, int index, 10159 int codePointOffset) { 10160 int length = seq.length(); 10161 if (index < 0 || index > length) { 10162 throw new IndexOutOfBoundsException(); 10163 } 10164 10165 int x = index; 10166 if (codePointOffset >= 0) { 10167 int i; 10168 for (i = 0; x < length && i < codePointOffset; i++) { 10169 if (isHighSurrogate(seq.charAt(x++)) && x < length && 10170 isLowSurrogate(seq.charAt(x))) { 10171 x++; 10172 } 10173 } 10174 if (i < codePointOffset) { 10175 throw new IndexOutOfBoundsException(); 10176 } 10177 } else { 10178 int i; 10179 for (i = codePointOffset; x > 0 && i < 0; i++) { 10180 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 10181 isHighSurrogate(seq.charAt(x-1))) { 10182 x--; 10183 } 10184 } 10185 if (i < 0) { 10186 throw new IndexOutOfBoundsException(); 10187 } 10188 } 10189 return x; 10190 } 10191 10192 /** 10193 * Returns the index within the given {@code char} subarray 10194 * that is offset from the given {@code index} by 10195 * {@code codePointOffset} code points. The 10196 * {@code start} and {@code count} arguments specify a 10197 * subarray of the {@code char} array. Unpaired surrogates 10198 * within the text range given by {@code index} and 10199 * {@code codePointOffset} count as one code point each. 10200 * 10201 * @param a the {@code char} array 10202 * @param start the index of the first {@code char} of the 10203 * subarray 10204 * @param count the length of the subarray in {@code char}s 10205 * @param index the index to be offset 10206 * @param codePointOffset the offset in code points 10207 * @return the index within the subarray 10208 * @throws NullPointerException if {@code a} is null. 10209 * @throws IndexOutOfBoundsException 10210 * if {@code start} or {@code count} is negative, 10211 * or if {@code start + count} is larger than the length of 10212 * the given array, 10213 * or if {@code index} is less than {@code start} or 10214 * larger then {@code start + count}, 10215 * or if {@code codePointOffset} is positive and the text range 10216 * starting with {@code index} and ending with {@code start + count - 1} 10217 * has fewer than {@code codePointOffset} code 10218 * points, 10219 * or if {@code codePointOffset} is negative and the text range 10220 * starting with {@code start} and ending with {@code index - 1} 10221 * has fewer than the absolute value of 10222 * {@code codePointOffset} code points. 10223 * @since 1.5 10224 */ 10225 public static int offsetByCodePoints(char[] a, int start, int count, 10226 int index, int codePointOffset) { 10227 if (count > a.length-start || start < 0 || count < 0 10228 || index < start || index > start+count) { 10229 throw new IndexOutOfBoundsException(); 10230 } 10231 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 10232 } 10233 10234 static int offsetByCodePointsImpl(char[]a, int start, int count, 10235 int index, int codePointOffset) { 10236 int x = index; 10237 if (codePointOffset >= 0) { 10238 int limit = start + count; 10239 int i; 10240 for (i = 0; x < limit && i < codePointOffset; i++) { 10241 if (isHighSurrogate(a[x++]) && x < limit && 10242 isLowSurrogate(a[x])) { 10243 x++; 10244 } 10245 } 10246 if (i < codePointOffset) { 10247 throw new IndexOutOfBoundsException(); 10248 } 10249 } else { 10250 int i; 10251 for (i = codePointOffset; x > start && i < 0; i++) { 10252 if (isLowSurrogate(a[--x]) && x > start && 10253 isHighSurrogate(a[x-1])) { 10254 x--; 10255 } 10256 } 10257 if (i < 0) { 10258 throw new IndexOutOfBoundsException(); 10259 } 10260 } 10261 return x; 10262 } 10263 10264 /** 10265 * Determines if the specified character is a lowercase character. 10266 * <p> 10267 * A character is lowercase if its general category type, provided 10268 * by {@code Character.getType(ch)}, is 10269 * {@code LOWERCASE_LETTER}, or it has contributory property 10270 * Other_Lowercase as defined by the Unicode Standard. 10271 * <p> 10272 * The following are examples of lowercase characters: 10273 * <blockquote><pre> 10274 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10275 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10276 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10277 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10278 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10279 * </pre></blockquote> 10280 * <p> Many other Unicode characters are lowercase too. 10281 * 10282 * <p><b>Note:</b> This method cannot handle <a 10283 * href="#supplementary"> supplementary characters</a>. To support 10284 * all Unicode characters, including supplementary characters, use 10285 * the {@link #isLowerCase(int)} method. 10286 * 10287 * @param ch the character to be tested. 10288 * @return {@code true} if the character is lowercase; 10289 * {@code false} otherwise. 10290 * @see Character#isLowerCase(char) 10291 * @see Character#isTitleCase(char) 10292 * @see Character#toLowerCase(char) 10293 * @see Character#getType(char) 10294 */ 10295 public static boolean isLowerCase(char ch) { 10296 return isLowerCase((int)ch); 10297 } 10298 10299 /** 10300 * Determines if the specified character (Unicode code point) is a 10301 * lowercase character. 10302 * <p> 10303 * A character is lowercase if its general category type, provided 10304 * by {@link Character#getType getType(codePoint)}, is 10305 * {@code LOWERCASE_LETTER}, or it has contributory property 10306 * Other_Lowercase as defined by the Unicode Standard. 10307 * <p> 10308 * The following are examples of lowercase characters: 10309 * <blockquote><pre> 10310 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10311 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10312 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10313 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10314 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10315 * </pre></blockquote> 10316 * <p> Many other Unicode characters are lowercase too. 10317 * 10318 * @param codePoint the character (Unicode code point) to be tested. 10319 * @return {@code true} if the character is lowercase; 10320 * {@code false} otherwise. 10321 * @see Character#isLowerCase(int) 10322 * @see Character#isTitleCase(int) 10323 * @see Character#toLowerCase(int) 10324 * @see Character#getType(int) 10325 * @since 1.5 10326 */ 10327 public static boolean isLowerCase(int codePoint) { 10328 return CharacterData.of(codePoint).isLowerCase(codePoint); 10329 } 10330 10331 /** 10332 * Determines if the specified character is an uppercase character. 10333 * <p> 10334 * A character is uppercase if its general category type, provided by 10335 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 10336 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10337 * <p> 10338 * The following are examples of uppercase characters: 10339 * <blockquote><pre> 10340 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10341 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10342 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10343 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10344 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10345 * </pre></blockquote> 10346 * <p> Many other Unicode characters are uppercase too. 10347 * 10348 * <p><b>Note:</b> This method cannot handle <a 10349 * href="#supplementary"> supplementary characters</a>. To support 10350 * all Unicode characters, including supplementary characters, use 10351 * the {@link #isUpperCase(int)} method. 10352 * 10353 * @param ch the character to be tested. 10354 * @return {@code true} if the character is uppercase; 10355 * {@code false} otherwise. 10356 * @see Character#isLowerCase(char) 10357 * @see Character#isTitleCase(char) 10358 * @see Character#toUpperCase(char) 10359 * @see Character#getType(char) 10360 * @since 1.0 10361 */ 10362 public static boolean isUpperCase(char ch) { 10363 return isUpperCase((int)ch); 10364 } 10365 10366 /** 10367 * Determines if the specified character (Unicode code point) is an uppercase character. 10368 * <p> 10369 * A character is uppercase if its general category type, provided by 10370 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 10371 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10372 * <p> 10373 * The following are examples of uppercase characters: 10374 * <blockquote><pre> 10375 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10376 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10377 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10378 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10379 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10380 * </pre></blockquote> 10381 * <p> Many other Unicode characters are uppercase too. 10382 * 10383 * @param codePoint the character (Unicode code point) to be tested. 10384 * @return {@code true} if the character is uppercase; 10385 * {@code false} otherwise. 10386 * @see Character#isLowerCase(int) 10387 * @see Character#isTitleCase(int) 10388 * @see Character#toUpperCase(int) 10389 * @see Character#getType(int) 10390 * @since 1.5 10391 */ 10392 public static boolean isUpperCase(int codePoint) { 10393 return CharacterData.of(codePoint).isUpperCase(codePoint); 10394 } 10395 10396 /** 10397 * Determines if the specified character is a titlecase character. 10398 * <p> 10399 * A character is a titlecase character if its general 10400 * category type, provided by {@code Character.getType(ch)}, 10401 * is {@code TITLECASE_LETTER}. 10402 * <p> 10403 * Some characters look like pairs of Latin letters. For example, there 10404 * is an uppercase letter that looks like "LJ" and has a corresponding 10405 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10406 * is the appropriate form to use when rendering a word in lowercase 10407 * with initial capitals, as for a book title. 10408 * <p> 10409 * These are some of the Unicode characters for which this method returns 10410 * {@code true}: 10411 * <ul> 10412 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10413 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10414 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10415 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10416 * </ul> 10417 * <p> Many other Unicode characters are titlecase too. 10418 * 10419 * <p><b>Note:</b> This method cannot handle <a 10420 * href="#supplementary"> supplementary characters</a>. To support 10421 * all Unicode characters, including supplementary characters, use 10422 * the {@link #isTitleCase(int)} method. 10423 * 10424 * @param ch the character to be tested. 10425 * @return {@code true} if the character is titlecase; 10426 * {@code false} otherwise. 10427 * @see Character#isLowerCase(char) 10428 * @see Character#isUpperCase(char) 10429 * @see Character#toTitleCase(char) 10430 * @see Character#getType(char) 10431 * @since 1.0.2 10432 */ 10433 public static boolean isTitleCase(char ch) { 10434 return isTitleCase((int)ch); 10435 } 10436 10437 /** 10438 * Determines if the specified character (Unicode code point) is a titlecase character. 10439 * <p> 10440 * A character is a titlecase character if its general 10441 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10442 * is {@code TITLECASE_LETTER}. 10443 * <p> 10444 * Some characters look like pairs of Latin letters. For example, there 10445 * is an uppercase letter that looks like "LJ" and has a corresponding 10446 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10447 * is the appropriate form to use when rendering a word in lowercase 10448 * with initial capitals, as for a book title. 10449 * <p> 10450 * These are some of the Unicode characters for which this method returns 10451 * {@code true}: 10452 * <ul> 10453 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10454 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10455 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10456 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10457 * </ul> 10458 * <p> Many other Unicode characters are titlecase too. 10459 * 10460 * @param codePoint the character (Unicode code point) to be tested. 10461 * @return {@code true} if the character is titlecase; 10462 * {@code false} otherwise. 10463 * @see Character#isLowerCase(int) 10464 * @see Character#isUpperCase(int) 10465 * @see Character#toTitleCase(int) 10466 * @see Character#getType(int) 10467 * @since 1.5 10468 */ 10469 public static boolean isTitleCase(int codePoint) { 10470 return getType(codePoint) == Character.TITLECASE_LETTER; 10471 } 10472 10473 /** 10474 * Determines if the specified character is a digit. 10475 * <p> 10476 * A character is a digit if its general category type, provided 10477 * by {@code Character.getType(ch)}, is 10478 * {@code DECIMAL_DIGIT_NUMBER}. 10479 * <p> 10480 * Some Unicode character ranges that contain digits: 10481 * <ul> 10482 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10483 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10484 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10485 * Arabic-Indic digits 10486 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10487 * Extended Arabic-Indic digits 10488 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10489 * Devanagari digits 10490 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10491 * Fullwidth digits 10492 * </ul> 10493 * 10494 * Many other character ranges contain digits as well. 10495 * 10496 * <p><b>Note:</b> This method cannot handle <a 10497 * href="#supplementary"> supplementary characters</a>. To support 10498 * all Unicode characters, including supplementary characters, use 10499 * the {@link #isDigit(int)} method. 10500 * 10501 * @param ch the character to be tested. 10502 * @return {@code true} if the character is a digit; 10503 * {@code false} otherwise. 10504 * @see Character#digit(char, int) 10505 * @see Character#forDigit(int, int) 10506 * @see Character#getType(char) 10507 */ 10508 public static boolean isDigit(char ch) { 10509 return isDigit((int)ch); 10510 } 10511 10512 /** 10513 * Determines if the specified character (Unicode code point) is a digit. 10514 * <p> 10515 * A character is a digit if its general category type, provided 10516 * by {@link Character#getType(int) getType(codePoint)}, is 10517 * {@code DECIMAL_DIGIT_NUMBER}. 10518 * <p> 10519 * Some Unicode character ranges that contain digits: 10520 * <ul> 10521 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10522 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10523 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10524 * Arabic-Indic digits 10525 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10526 * Extended Arabic-Indic digits 10527 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10528 * Devanagari digits 10529 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10530 * Fullwidth digits 10531 * </ul> 10532 * 10533 * Many other character ranges contain digits as well. 10534 * 10535 * @param codePoint the character (Unicode code point) to be tested. 10536 * @return {@code true} if the character is a digit; 10537 * {@code false} otherwise. 10538 * @see Character#forDigit(int, int) 10539 * @see Character#getType(int) 10540 * @since 1.5 10541 */ 10542 public static boolean isDigit(int codePoint) { 10543 return CharacterData.of(codePoint).isDigit(codePoint); 10544 } 10545 10546 /** 10547 * Determines if a character is defined in Unicode. 10548 * <p> 10549 * A character is defined if at least one of the following is true: 10550 * <ul> 10551 * <li>It has an entry in the UnicodeData file. 10552 * <li>It has a value in a range defined by the UnicodeData file. 10553 * </ul> 10554 * 10555 * <p><b>Note:</b> This method cannot handle <a 10556 * href="#supplementary"> supplementary characters</a>. To support 10557 * all Unicode characters, including supplementary characters, use 10558 * the {@link #isDefined(int)} method. 10559 * 10560 * @param ch the character to be tested 10561 * @return {@code true} if the character has a defined meaning 10562 * in Unicode; {@code false} otherwise. 10563 * @see Character#isDigit(char) 10564 * @see Character#isLetter(char) 10565 * @see Character#isLetterOrDigit(char) 10566 * @see Character#isLowerCase(char) 10567 * @see Character#isTitleCase(char) 10568 * @see Character#isUpperCase(char) 10569 * @since 1.0.2 10570 */ 10571 public static boolean isDefined(char ch) { 10572 return isDefined((int)ch); 10573 } 10574 10575 /** 10576 * Determines if a character (Unicode code point) is defined in Unicode. 10577 * <p> 10578 * A character is defined if at least one of the following is true: 10579 * <ul> 10580 * <li>It has an entry in the UnicodeData file. 10581 * <li>It has a value in a range defined by the UnicodeData file. 10582 * </ul> 10583 * 10584 * @param codePoint the character (Unicode code point) to be tested. 10585 * @return {@code true} if the character has a defined meaning 10586 * in Unicode; {@code false} otherwise. 10587 * @see Character#isDigit(int) 10588 * @see Character#isLetter(int) 10589 * @see Character#isLetterOrDigit(int) 10590 * @see Character#isLowerCase(int) 10591 * @see Character#isTitleCase(int) 10592 * @see Character#isUpperCase(int) 10593 * @since 1.5 10594 */ 10595 public static boolean isDefined(int codePoint) { 10596 return getType(codePoint) != Character.UNASSIGNED; 10597 } 10598 10599 /** 10600 * Determines if the specified character is a letter. 10601 * <p> 10602 * A character is considered to be a letter if its general 10603 * category type, provided by {@code Character.getType(ch)}, 10604 * is any of the following: 10605 * <ul> 10606 * <li> {@code UPPERCASE_LETTER} 10607 * <li> {@code LOWERCASE_LETTER} 10608 * <li> {@code TITLECASE_LETTER} 10609 * <li> {@code MODIFIER_LETTER} 10610 * <li> {@code OTHER_LETTER} 10611 * </ul> 10612 * 10613 * Not all letters have case. Many characters are 10614 * letters but are neither uppercase nor lowercase nor titlecase. 10615 * 10616 * <p><b>Note:</b> This method cannot handle <a 10617 * href="#supplementary"> supplementary characters</a>. To support 10618 * all Unicode characters, including supplementary characters, use 10619 * the {@link #isLetter(int)} method. 10620 * 10621 * @param ch the character to be tested. 10622 * @return {@code true} if the character is a letter; 10623 * {@code false} otherwise. 10624 * @see Character#isDigit(char) 10625 * @see Character#isJavaIdentifierStart(char) 10626 * @see Character#isJavaLetter(char) 10627 * @see Character#isJavaLetterOrDigit(char) 10628 * @see Character#isLetterOrDigit(char) 10629 * @see Character#isLowerCase(char) 10630 * @see Character#isTitleCase(char) 10631 * @see Character#isUnicodeIdentifierStart(char) 10632 * @see Character#isUpperCase(char) 10633 */ 10634 public static boolean isLetter(char ch) { 10635 return isLetter((int)ch); 10636 } 10637 10638 /** 10639 * Determines if the specified character (Unicode code point) is a letter. 10640 * <p> 10641 * A character is considered to be a letter if its general 10642 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10643 * is any of the following: 10644 * <ul> 10645 * <li> {@code UPPERCASE_LETTER} 10646 * <li> {@code LOWERCASE_LETTER} 10647 * <li> {@code TITLECASE_LETTER} 10648 * <li> {@code MODIFIER_LETTER} 10649 * <li> {@code OTHER_LETTER} 10650 * </ul> 10651 * 10652 * Not all letters have case. Many characters are 10653 * letters but are neither uppercase nor lowercase nor titlecase. 10654 * 10655 * @param codePoint the character (Unicode code point) to be tested. 10656 * @return {@code true} if the character is a letter; 10657 * {@code false} otherwise. 10658 * @see Character#isDigit(int) 10659 * @see Character#isJavaIdentifierStart(int) 10660 * @see Character#isLetterOrDigit(int) 10661 * @see Character#isLowerCase(int) 10662 * @see Character#isTitleCase(int) 10663 * @see Character#isUnicodeIdentifierStart(int) 10664 * @see Character#isUpperCase(int) 10665 * @since 1.5 10666 */ 10667 public static boolean isLetter(int codePoint) { 10668 return ((((1 << Character.UPPERCASE_LETTER) | 10669 (1 << Character.LOWERCASE_LETTER) | 10670 (1 << Character.TITLECASE_LETTER) | 10671 (1 << Character.MODIFIER_LETTER) | 10672 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10673 != 0; 10674 } 10675 10676 /** 10677 * Determines if the specified character is a letter or digit. 10678 * <p> 10679 * A character is considered to be a letter or digit if either 10680 * {@code Character.isLetter(char ch)} or 10681 * {@code Character.isDigit(char ch)} returns 10682 * {@code true} for the character. 10683 * 10684 * <p><b>Note:</b> This method cannot handle <a 10685 * href="#supplementary"> supplementary characters</a>. To support 10686 * all Unicode characters, including supplementary characters, use 10687 * the {@link #isLetterOrDigit(int)} method. 10688 * 10689 * @param ch the character to be tested. 10690 * @return {@code true} if the character is a letter or digit; 10691 * {@code false} otherwise. 10692 * @see Character#isDigit(char) 10693 * @see Character#isJavaIdentifierPart(char) 10694 * @see Character#isJavaLetter(char) 10695 * @see Character#isJavaLetterOrDigit(char) 10696 * @see Character#isLetter(char) 10697 * @see Character#isUnicodeIdentifierPart(char) 10698 * @since 1.0.2 10699 */ 10700 public static boolean isLetterOrDigit(char ch) { 10701 return isLetterOrDigit((int)ch); 10702 } 10703 10704 /** 10705 * Determines if the specified character (Unicode code point) is a letter or digit. 10706 * <p> 10707 * A character is considered to be a letter or digit if either 10708 * {@link #isLetter(int) isLetter(codePoint)} or 10709 * {@link #isDigit(int) isDigit(codePoint)} returns 10710 * {@code true} for the character. 10711 * 10712 * @param codePoint the character (Unicode code point) to be tested. 10713 * @return {@code true} if the character is a letter or digit; 10714 * {@code false} otherwise. 10715 * @see Character#isDigit(int) 10716 * @see Character#isJavaIdentifierPart(int) 10717 * @see Character#isLetter(int) 10718 * @see Character#isUnicodeIdentifierPart(int) 10719 * @since 1.5 10720 */ 10721 public static boolean isLetterOrDigit(int codePoint) { 10722 return ((((1 << Character.UPPERCASE_LETTER) | 10723 (1 << Character.LOWERCASE_LETTER) | 10724 (1 << Character.TITLECASE_LETTER) | 10725 (1 << Character.MODIFIER_LETTER) | 10726 (1 << Character.OTHER_LETTER) | 10727 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10728 != 0; 10729 } 10730 10731 /** 10732 * Determines if the specified character is permissible as the first 10733 * character in a Java identifier. 10734 * <p> 10735 * A character may start a Java identifier if and only if 10736 * one of the following conditions is true: 10737 * <ul> 10738 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10739 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10740 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10741 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10742 * </ul> 10743 * 10744 * @param ch the character to be tested. 10745 * @return {@code true} if the character may start a Java 10746 * identifier; {@code false} otherwise. 10747 * @see Character#isJavaLetterOrDigit(char) 10748 * @see Character#isJavaIdentifierStart(char) 10749 * @see Character#isJavaIdentifierPart(char) 10750 * @see Character#isLetter(char) 10751 * @see Character#isLetterOrDigit(char) 10752 * @see Character#isUnicodeIdentifierStart(char) 10753 * @since 1.0.2 10754 * @deprecated Replaced by isJavaIdentifierStart(char). 10755 */ 10756 @Deprecated(since="1.1") 10757 public static boolean isJavaLetter(char ch) { 10758 return isJavaIdentifierStart(ch); 10759 } 10760 10761 /** 10762 * Determines if the specified character may be part of a Java 10763 * identifier as other than the first character. 10764 * <p> 10765 * A character may be part of a Java identifier if and only if one 10766 * of the following conditions is true: 10767 * <ul> 10768 * <li> it is a letter 10769 * <li> it is a currency symbol (such as {@code '$'}) 10770 * <li> it is a connecting punctuation character (such as {@code '_'}) 10771 * <li> it is a digit 10772 * <li> it is a numeric letter (such as a Roman numeral character) 10773 * <li> it is a combining mark 10774 * <li> it is a non-spacing mark 10775 * <li> {@code isIdentifierIgnorable} returns 10776 * {@code true} for the character. 10777 * </ul> 10778 * 10779 * @param ch the character to be tested. 10780 * @return {@code true} if the character may be part of a 10781 * Java identifier; {@code false} otherwise. 10782 * @see Character#isJavaLetter(char) 10783 * @see Character#isJavaIdentifierStart(char) 10784 * @see Character#isJavaIdentifierPart(char) 10785 * @see Character#isLetter(char) 10786 * @see Character#isLetterOrDigit(char) 10787 * @see Character#isUnicodeIdentifierPart(char) 10788 * @see Character#isIdentifierIgnorable(char) 10789 * @since 1.0.2 10790 * @deprecated Replaced by isJavaIdentifierPart(char). 10791 */ 10792 @Deprecated(since="1.1") 10793 public static boolean isJavaLetterOrDigit(char ch) { 10794 return isJavaIdentifierPart(ch); 10795 } 10796 10797 /** 10798 * Determines if the specified character (Unicode code point) is alphabetic. 10799 * <p> 10800 * A character is considered to be alphabetic if its general category type, 10801 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10802 * the following: 10803 * <ul> 10804 * <li> {@code UPPERCASE_LETTER} 10805 * <li> {@code LOWERCASE_LETTER} 10806 * <li> {@code TITLECASE_LETTER} 10807 * <li> {@code MODIFIER_LETTER} 10808 * <li> {@code OTHER_LETTER} 10809 * <li> {@code LETTER_NUMBER} 10810 * </ul> 10811 * or it has contributory property Other_Alphabetic as defined by the 10812 * Unicode Standard. 10813 * 10814 * @param codePoint the character (Unicode code point) to be tested. 10815 * @return {@code true} if the character is a Unicode alphabet 10816 * character, {@code false} otherwise. 10817 * @since 1.7 10818 */ 10819 public static boolean isAlphabetic(int codePoint) { 10820 return (((((1 << Character.UPPERCASE_LETTER) | 10821 (1 << Character.LOWERCASE_LETTER) | 10822 (1 << Character.TITLECASE_LETTER) | 10823 (1 << Character.MODIFIER_LETTER) | 10824 (1 << Character.OTHER_LETTER) | 10825 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10826 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10827 } 10828 10829 /** 10830 * Determines if the specified character (Unicode code point) is a CJKV 10831 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10832 * the Unicode Standard. 10833 * 10834 * @param codePoint the character (Unicode code point) to be tested. 10835 * @return {@code true} if the character is a Unicode ideograph 10836 * character, {@code false} otherwise. 10837 * @since 1.7 10838 */ 10839 public static boolean isIdeographic(int codePoint) { 10840 return CharacterData.of(codePoint).isIdeographic(codePoint); 10841 } 10842 10843 /** 10844 * Determines if the specified character is 10845 * permissible as the first character in a Java identifier. 10846 * <p> 10847 * A character may start a Java identifier if and only if 10848 * one of the following conditions is true: 10849 * <ul> 10850 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10851 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10852 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10853 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10854 * </ul> 10855 * 10856 * <p><b>Note:</b> This method cannot handle <a 10857 * href="#supplementary"> supplementary characters</a>. To support 10858 * all Unicode characters, including supplementary characters, use 10859 * the {@link #isJavaIdentifierStart(int)} method. 10860 * 10861 * @param ch the character to be tested. 10862 * @return {@code true} if the character may start a Java identifier; 10863 * {@code false} otherwise. 10864 * @see Character#isJavaIdentifierPart(char) 10865 * @see Character#isLetter(char) 10866 * @see Character#isUnicodeIdentifierStart(char) 10867 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10868 * @since 1.1 10869 */ 10870 @SuppressWarnings("doclint:reference") // cross-module links 10871 public static boolean isJavaIdentifierStart(char ch) { 10872 return isJavaIdentifierStart((int)ch); 10873 } 10874 10875 /** 10876 * Determines if the character (Unicode code point) is 10877 * permissible as the first character in a Java identifier. 10878 * <p> 10879 * A character may start a Java identifier if and only if 10880 * one of the following conditions is true: 10881 * <ul> 10882 * <li> {@link #isLetter(int) isLetter(codePoint)} 10883 * returns {@code true} 10884 * <li> {@link #getType(int) getType(codePoint)} 10885 * returns {@code LETTER_NUMBER} 10886 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10887 * <li> the referenced character is a connecting punctuation character 10888 * (such as {@code '_'}). 10889 * </ul> 10890 * 10891 * @param codePoint the character (Unicode code point) to be tested. 10892 * @return {@code true} if the character may start a Java identifier; 10893 * {@code false} otherwise. 10894 * @see Character#isJavaIdentifierPart(int) 10895 * @see Character#isLetter(int) 10896 * @see Character#isUnicodeIdentifierStart(int) 10897 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10898 * @since 1.5 10899 */ 10900 @SuppressWarnings("doclint:reference") // cross-module links 10901 public static boolean isJavaIdentifierStart(int codePoint) { 10902 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10903 } 10904 10905 /** 10906 * Determines if the specified character may be part of a Java 10907 * identifier as other than the first character. 10908 * <p> 10909 * A character may be part of a Java identifier if any of the following 10910 * conditions are true: 10911 * <ul> 10912 * <li> it is a letter 10913 * <li> it is a currency symbol (such as {@code '$'}) 10914 * <li> it is a connecting punctuation character (such as {@code '_'}) 10915 * <li> it is a digit 10916 * <li> it is a numeric letter (such as a Roman numeral character) 10917 * <li> it is a combining mark 10918 * <li> it is a non-spacing mark 10919 * <li> {@code isIdentifierIgnorable} returns 10920 * {@code true} for the character 10921 * </ul> 10922 * 10923 * <p><b>Note:</b> This method cannot handle <a 10924 * href="#supplementary"> supplementary characters</a>. To support 10925 * all Unicode characters, including supplementary characters, use 10926 * the {@link #isJavaIdentifierPart(int)} method. 10927 * 10928 * @param ch the character to be tested. 10929 * @return {@code true} if the character may be part of a 10930 * Java identifier; {@code false} otherwise. 10931 * @see Character#isIdentifierIgnorable(char) 10932 * @see Character#isJavaIdentifierStart(char) 10933 * @see Character#isLetterOrDigit(char) 10934 * @see Character#isUnicodeIdentifierPart(char) 10935 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10936 * @since 1.1 10937 */ 10938 @SuppressWarnings("doclint:reference") // cross-module links 10939 public static boolean isJavaIdentifierPart(char ch) { 10940 return isJavaIdentifierPart((int)ch); 10941 } 10942 10943 /** 10944 * Determines if the character (Unicode code point) may be part of a Java 10945 * identifier as other than the first character. 10946 * <p> 10947 * A character may be part of a Java identifier if any of the following 10948 * conditions are true: 10949 * <ul> 10950 * <li> it is a letter 10951 * <li> it is a currency symbol (such as {@code '$'}) 10952 * <li> it is a connecting punctuation character (such as {@code '_'}) 10953 * <li> it is a digit 10954 * <li> it is a numeric letter (such as a Roman numeral character) 10955 * <li> it is a combining mark 10956 * <li> it is a non-spacing mark 10957 * <li> {@link #isIdentifierIgnorable(int) 10958 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10959 * the code point 10960 * </ul> 10961 * 10962 * @param codePoint the character (Unicode code point) to be tested. 10963 * @return {@code true} if the character may be part of a 10964 * Java identifier; {@code false} otherwise. 10965 * @see Character#isIdentifierIgnorable(int) 10966 * @see Character#isJavaIdentifierStart(int) 10967 * @see Character#isLetterOrDigit(int) 10968 * @see Character#isUnicodeIdentifierPart(int) 10969 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10970 * @since 1.5 10971 */ 10972 @SuppressWarnings("doclint:reference") // cross-module links 10973 public static boolean isJavaIdentifierPart(int codePoint) { 10974 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10975 } 10976 10977 /** 10978 * Determines if the specified character is permissible as the 10979 * first character in a Unicode identifier. 10980 * <p> 10981 * A character may start a Unicode identifier if and only if 10982 * one of the following conditions is true: 10983 * <ul> 10984 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10985 * <li> {@link #getType(char) getType(ch)} returns 10986 * {@code LETTER_NUMBER}. 10987 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10988 * {@code Other_ID_Start}</a> character. 10989 * </ul> 10990 * <p> 10991 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10992 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10993 * with the following profile of UAX31: 10994 * <pre> 10995 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10996 * </pre> 10997 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10998 * compatibility. 10999 * 11000 * <p><b>Note:</b> This method cannot handle <a 11001 * href="#supplementary"> supplementary characters</a>. To support 11002 * all Unicode characters, including supplementary characters, use 11003 * the {@link #isUnicodeIdentifierStart(int)} method. 11004 * 11005 * @param ch the character to be tested. 11006 * @return {@code true} if the character may start a Unicode 11007 * identifier; {@code false} otherwise. 11008 * 11009 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 11010 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 11011 * @see Character#isJavaIdentifierStart(char) 11012 * @see Character#isLetter(char) 11013 * @see Character#isUnicodeIdentifierPart(char) 11014 * @since 1.1 11015 */ 11016 public static boolean isUnicodeIdentifierStart(char ch) { 11017 return isUnicodeIdentifierStart((int)ch); 11018 } 11019 11020 /** 11021 * Determines if the specified character (Unicode code point) is permissible as the 11022 * first character in a Unicode identifier. 11023 * <p> 11024 * A character may start a Unicode identifier if and only if 11025 * one of the following conditions is true: 11026 * <ul> 11027 * <li> {@link #isLetter(int) isLetter(codePoint)} 11028 * returns {@code true} 11029 * <li> {@link #getType(int) getType(codePoint)} 11030 * returns {@code LETTER_NUMBER}. 11031 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 11032 * {@code Other_ID_Start}</a> character. 11033 * </ul> 11034 * <p> 11035 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 11036 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 11037 * with the following profile of UAX31: 11038 * <pre> 11039 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 11040 * </pre> 11041 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 11042 * compatibility. 11043 * 11044 * @param codePoint the character (Unicode code point) to be tested. 11045 * @return {@code true} if the character may start a Unicode 11046 * identifier; {@code false} otherwise. 11047 * 11048 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 11049 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 11050 * @see Character#isJavaIdentifierStart(int) 11051 * @see Character#isLetter(int) 11052 * @see Character#isUnicodeIdentifierPart(int) 11053 * @since 1.5 11054 */ 11055 public static boolean isUnicodeIdentifierStart(int codePoint) { 11056 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 11057 } 11058 11059 /** 11060 * Determines if the specified character may be part of a Unicode 11061 * identifier as other than the first character. 11062 * <p> 11063 * A character may be part of a Unicode identifier if and only if 11064 * one of the following statements is true: 11065 * <ul> 11066 * <li> it is a letter 11067 * <li> it is a connecting punctuation character (such as {@code '_'}) 11068 * <li> it is a digit 11069 * <li> it is a numeric letter (such as a Roman numeral character) 11070 * <li> it is a combining mark 11071 * <li> it is a non-spacing mark 11072 * <li> {@code isIdentifierIgnorable} returns 11073 * {@code true} for this character. 11074 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 11075 * {@code Other_ID_Start}</a> character. 11076 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 11077 * {@code Other_ID_Continue}</a> character. 11078 * </ul> 11079 * <p> 11080 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 11081 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 11082 * with the following profile of UAX31: 11083 * <pre> 11084 * Continue := Start + ID_Continue + ignorable 11085 * Medial := empty 11086 * ignorable := isIdentifierIgnorable(char) returns true for the character 11087 * </pre> 11088 * {@code ignorable} is added to {@code Continue} for backward 11089 * compatibility. 11090 * 11091 * <p><b>Note:</b> This method cannot handle <a 11092 * href="#supplementary"> supplementary characters</a>. To support 11093 * all Unicode characters, including supplementary characters, use 11094 * the {@link #isUnicodeIdentifierPart(int)} method. 11095 * 11096 * @param ch the character to be tested. 11097 * @return {@code true} if the character may be part of a 11098 * Unicode identifier; {@code false} otherwise. 11099 * 11100 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 11101 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 11102 * @see Character#isIdentifierIgnorable(char) 11103 * @see Character#isJavaIdentifierPart(char) 11104 * @see Character#isLetterOrDigit(char) 11105 * @see Character#isUnicodeIdentifierStart(char) 11106 * @since 1.1 11107 */ 11108 public static boolean isUnicodeIdentifierPart(char ch) { 11109 return isUnicodeIdentifierPart((int)ch); 11110 } 11111 11112 /** 11113 * Determines if the specified character (Unicode code point) may be part of a Unicode 11114 * identifier as other than the first character. 11115 * <p> 11116 * A character may be part of a Unicode identifier if and only if 11117 * one of the following statements is true: 11118 * <ul> 11119 * <li> it is a letter 11120 * <li> it is a connecting punctuation character (such as {@code '_'}) 11121 * <li> it is a digit 11122 * <li> it is a numeric letter (such as a Roman numeral character) 11123 * <li> it is a combining mark 11124 * <li> it is a non-spacing mark 11125 * <li> {@code isIdentifierIgnorable} returns 11126 * {@code true} for this character. 11127 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 11128 * {@code Other_ID_Start}</a> character. 11129 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 11130 * {@code Other_ID_Continue}</a> character. 11131 * </ul> 11132 * <p> 11133 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 11134 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 11135 * with the following profile of UAX31: 11136 * <pre> 11137 * Continue := Start + ID_Continue + ignorable 11138 * Medial := empty 11139 * ignorable := isIdentifierIgnorable(int) returns true for the character 11140 * </pre> 11141 * {@code ignorable} is added to {@code Continue} for backward 11142 * compatibility. 11143 * 11144 * @param codePoint the character (Unicode code point) to be tested. 11145 * @return {@code true} if the character may be part of a 11146 * Unicode identifier; {@code false} otherwise. 11147 * 11148 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 11149 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 11150 * @see Character#isIdentifierIgnorable(int) 11151 * @see Character#isJavaIdentifierPart(int) 11152 * @see Character#isLetterOrDigit(int) 11153 * @see Character#isUnicodeIdentifierStart(int) 11154 * @since 1.5 11155 */ 11156 public static boolean isUnicodeIdentifierPart(int codePoint) { 11157 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 11158 } 11159 11160 /** 11161 * Determines if the specified character should be regarded as 11162 * an ignorable character in a Java identifier or a Unicode identifier. 11163 * <p> 11164 * The following Unicode characters are ignorable in a Java identifier 11165 * or a Unicode identifier: 11166 * <ul> 11167 * <li>ISO control characters that are not whitespace 11168 * <ul> 11169 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11170 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11171 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11172 * </ul> 11173 * 11174 * <li>all characters that have the {@code FORMAT} general 11175 * category value 11176 * </ul> 11177 * 11178 * <p><b>Note:</b> This method cannot handle <a 11179 * href="#supplementary"> supplementary characters</a>. To support 11180 * all Unicode characters, including supplementary characters, use 11181 * the {@link #isIdentifierIgnorable(int)} method. 11182 * 11183 * @param ch the character to be tested. 11184 * @return {@code true} if the character is an ignorable control 11185 * character that may be part of a Java or Unicode identifier; 11186 * {@code false} otherwise. 11187 * @see Character#isJavaIdentifierPart(char) 11188 * @see Character#isUnicodeIdentifierPart(char) 11189 * @since 1.1 11190 */ 11191 public static boolean isIdentifierIgnorable(char ch) { 11192 return isIdentifierIgnorable((int)ch); 11193 } 11194 11195 /** 11196 * Determines if the specified character (Unicode code point) should be regarded as 11197 * an ignorable character in a Java identifier or a Unicode identifier. 11198 * <p> 11199 * The following Unicode characters are ignorable in a Java identifier 11200 * or a Unicode identifier: 11201 * <ul> 11202 * <li>ISO control characters that are not whitespace 11203 * <ul> 11204 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11205 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11206 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11207 * </ul> 11208 * 11209 * <li>all characters that have the {@code FORMAT} general 11210 * category value 11211 * </ul> 11212 * 11213 * @param codePoint the character (Unicode code point) to be tested. 11214 * @return {@code true} if the character is an ignorable control 11215 * character that may be part of a Java or Unicode identifier; 11216 * {@code false} otherwise. 11217 * @see Character#isJavaIdentifierPart(int) 11218 * @see Character#isUnicodeIdentifierPart(int) 11219 * @since 1.5 11220 */ 11221 public static boolean isIdentifierIgnorable(int codePoint) { 11222 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 11223 } 11224 11225 /** 11226 * Determines if the specified character (Unicode code point) is an Emoji. 11227 * <p> 11228 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 11229 * property, defined in 11230 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11231 * Unicode Emoji (Technical Standard #51)</a>. 11232 * 11233 * @param codePoint the character (Unicode code point) to be tested. 11234 * @return {@code true} if the character is an Emoji; 11235 * {@code false} otherwise. 11236 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11237 * @since 21 11238 */ 11239 public static boolean isEmoji(int codePoint) { 11240 return CharacterData.of(codePoint).isEmoji(codePoint); 11241 } 11242 11243 /** 11244 * Determines if the specified character (Unicode code point) has the 11245 * Emoji Presentation property by default. 11246 * <p> 11247 * A character is considered to have the Emoji Presentation property if and 11248 * only if it has the {@code Emoji_Presentation} property, defined in 11249 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11250 * Unicode Emoji (Technical Standard #51)</a>. 11251 * 11252 * @param codePoint the character (Unicode code point) to be tested. 11253 * @return {@code true} if the character has the Emoji Presentation 11254 * property; {@code false} otherwise. 11255 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11256 * @since 21 11257 */ 11258 public static boolean isEmojiPresentation(int codePoint) { 11259 return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 11260 } 11261 11262 /** 11263 * Determines if the specified character (Unicode code point) is an 11264 * Emoji Modifier. 11265 * <p> 11266 * A character is considered to be an Emoji Modifier if and only if it has 11267 * the {@code Emoji_Modifier} property, defined in 11268 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11269 * Unicode Emoji (Technical Standard #51)</a>. 11270 * 11271 * @param codePoint the character (Unicode code point) to be tested. 11272 * @return {@code true} if the character is an Emoji Modifier; 11273 * {@code false} otherwise. 11274 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11275 * @since 21 11276 */ 11277 public static boolean isEmojiModifier(int codePoint) { 11278 return CharacterData.of(codePoint).isEmojiModifier(codePoint); 11279 } 11280 11281 /** 11282 * Determines if the specified character (Unicode code point) is an 11283 * Emoji Modifier Base. 11284 * <p> 11285 * A character is considered to be an Emoji Modifier Base if and only if it has 11286 * the {@code Emoji_Modifier_Base} property, defined in 11287 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11288 * Unicode Emoji (Technical Standard #51)</a>. 11289 * 11290 * @param codePoint the character (Unicode code point) to be tested. 11291 * @return {@code true} if the character is an Emoji Modifier Base; 11292 * {@code false} otherwise. 11293 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11294 * @since 21 11295 */ 11296 public static boolean isEmojiModifierBase(int codePoint) { 11297 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 11298 } 11299 11300 /** 11301 * Determines if the specified character (Unicode code point) is an 11302 * Emoji Component. 11303 * <p> 11304 * A character is considered to be an Emoji Component if and only if it has 11305 * the {@code Emoji_Component} property, defined in 11306 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11307 * Unicode Emoji (Technical Standard #51)</a>. 11308 * 11309 * @param codePoint the character (Unicode code point) to be tested. 11310 * @return {@code true} if the character is an Emoji Component; 11311 * {@code false} otherwise. 11312 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11313 * @since 21 11314 */ 11315 public static boolean isEmojiComponent(int codePoint) { 11316 return CharacterData.of(codePoint).isEmojiComponent(codePoint); 11317 } 11318 11319 /** 11320 * Determines if the specified character (Unicode code point) is 11321 * an Extended Pictographic. 11322 * <p> 11323 * A character is considered to be an Extended Pictographic if and only if it has 11324 * the {@code Extended_Pictographic} property, defined in 11325 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11326 * Unicode Emoji (Technical Standard #51)</a>. 11327 * 11328 * @param codePoint the character (Unicode code point) to be tested. 11329 * @return {@code true} if the character is an Extended Pictographic; 11330 * {@code false} otherwise. 11331 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11332 * @since 21 11333 */ 11334 public static boolean isExtendedPictographic(int codePoint) { 11335 return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 11336 } 11337 11338 /** 11339 * Converts the character argument to lowercase using case 11340 * mapping information from the UnicodeData file. 11341 * <p> 11342 * Note that 11343 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 11344 * does not always return {@code true} for some ranges of 11345 * characters, particularly those that are symbols or ideographs. 11346 * 11347 * <p>In general, {@link String#toLowerCase()} should be used to map 11348 * characters to lowercase. {@code String} case mapping methods 11349 * have several benefits over {@code Character} case mapping methods. 11350 * {@code String} case mapping methods can perform locale-sensitive 11351 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11352 * the {@code Character} case mapping methods cannot. 11353 * 11354 * <p><b>Note:</b> This method cannot handle <a 11355 * href="#supplementary"> supplementary characters</a>. To support 11356 * all Unicode characters, including supplementary characters, use 11357 * the {@link #toLowerCase(int)} method. 11358 * 11359 * @param ch the character to be converted. 11360 * @return the lowercase equivalent of the character, if any; 11361 * otherwise, the character itself. 11362 * @see Character#isLowerCase(char) 11363 * @see String#toLowerCase() 11364 */ 11365 public static char toLowerCase(char ch) { 11366 return (char)toLowerCase((int)ch); 11367 } 11368 11369 /** 11370 * Converts the character (Unicode code point) argument to 11371 * lowercase using case mapping information from the UnicodeData 11372 * file. 11373 * 11374 * <p> Note that 11375 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 11376 * does not always return {@code true} for some ranges of 11377 * characters, particularly those that are symbols or ideographs. 11378 * 11379 * <p>In general, {@link String#toLowerCase()} should be used to map 11380 * characters to lowercase. {@code String} case mapping methods 11381 * have several benefits over {@code Character} case mapping methods. 11382 * {@code String} case mapping methods can perform locale-sensitive 11383 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11384 * the {@code Character} case mapping methods cannot. 11385 * 11386 * @param codePoint the character (Unicode code point) to be converted. 11387 * @return the lowercase equivalent of the character (Unicode code 11388 * point), if any; otherwise, the character itself. 11389 * @see Character#isLowerCase(int) 11390 * @see String#toLowerCase() 11391 * 11392 * @since 1.5 11393 */ 11394 public static int toLowerCase(int codePoint) { 11395 return CharacterData.of(codePoint).toLowerCase(codePoint); 11396 } 11397 11398 /** 11399 * Converts the character argument to uppercase using case mapping 11400 * information from the UnicodeData file. 11401 * <p> 11402 * Note that 11403 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 11404 * does not always return {@code true} for some ranges of 11405 * characters, particularly those that are symbols or ideographs. 11406 * 11407 * <p>In general, {@link String#toUpperCase()} should be used to map 11408 * characters to uppercase. {@code String} case mapping methods 11409 * have several benefits over {@code Character} case mapping methods. 11410 * {@code String} case mapping methods can perform locale-sensitive 11411 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11412 * the {@code Character} case mapping methods cannot. 11413 * 11414 * <p><b>Note:</b> This method cannot handle <a 11415 * href="#supplementary"> supplementary characters</a>. To support 11416 * all Unicode characters, including supplementary characters, use 11417 * the {@link #toUpperCase(int)} method. 11418 * 11419 * @param ch the character to be converted. 11420 * @return the uppercase equivalent of the character, if any; 11421 * otherwise, the character itself. 11422 * @see Character#isUpperCase(char) 11423 * @see String#toUpperCase() 11424 */ 11425 public static char toUpperCase(char ch) { 11426 return (char)toUpperCase((int)ch); 11427 } 11428 11429 /** 11430 * Converts the character (Unicode code point) argument to 11431 * uppercase using case mapping information from the UnicodeData 11432 * file. 11433 * 11434 * <p>Note that 11435 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11436 * does not always return {@code true} for some ranges of 11437 * characters, particularly those that are symbols or ideographs. 11438 * 11439 * <p>In general, {@link String#toUpperCase()} should be used to map 11440 * characters to uppercase. {@code String} case mapping methods 11441 * have several benefits over {@code Character} case mapping methods. 11442 * {@code String} case mapping methods can perform locale-sensitive 11443 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11444 * the {@code Character} case mapping methods cannot. 11445 * 11446 * @param codePoint the character (Unicode code point) to be converted. 11447 * @return the uppercase equivalent of the character, if any; 11448 * otherwise, the character itself. 11449 * @see Character#isUpperCase(int) 11450 * @see String#toUpperCase() 11451 * 11452 * @since 1.5 11453 */ 11454 public static int toUpperCase(int codePoint) { 11455 return CharacterData.of(codePoint).toUpperCase(codePoint); 11456 } 11457 11458 /** 11459 * Converts the character argument to titlecase using case mapping 11460 * information from the UnicodeData file. If a character has no 11461 * explicit titlecase mapping and is not itself a titlecase char 11462 * according to UnicodeData, then the uppercase mapping is 11463 * returned as an equivalent titlecase mapping. If the 11464 * {@code char} argument is already a titlecase 11465 * {@code char}, the same {@code char} value will be 11466 * returned. 11467 * <p> 11468 * Note that 11469 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11470 * does not always return {@code true} for some ranges of 11471 * characters. 11472 * 11473 * <p><b>Note:</b> This method cannot handle <a 11474 * href="#supplementary"> supplementary characters</a>. To support 11475 * all Unicode characters, including supplementary characters, use 11476 * the {@link #toTitleCase(int)} method. 11477 * 11478 * @param ch the character to be converted. 11479 * @return the titlecase equivalent of the character, if any; 11480 * otherwise, the character itself. 11481 * @see Character#isTitleCase(char) 11482 * @see Character#toLowerCase(char) 11483 * @see Character#toUpperCase(char) 11484 * @since 1.0.2 11485 */ 11486 public static char toTitleCase(char ch) { 11487 return (char)toTitleCase((int)ch); 11488 } 11489 11490 /** 11491 * Converts the character (Unicode code point) argument to titlecase using case mapping 11492 * information from the UnicodeData file. If a character has no 11493 * explicit titlecase mapping and is not itself a titlecase char 11494 * according to UnicodeData, then the uppercase mapping is 11495 * returned as an equivalent titlecase mapping. If the 11496 * character argument is already a titlecase 11497 * character, the same character value will be 11498 * returned. 11499 * 11500 * <p>Note that 11501 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11502 * does not always return {@code true} for some ranges of 11503 * characters. 11504 * 11505 * @param codePoint the character (Unicode code point) to be converted. 11506 * @return the titlecase equivalent of the character, if any; 11507 * otherwise, the character itself. 11508 * @see Character#isTitleCase(int) 11509 * @see Character#toLowerCase(int) 11510 * @see Character#toUpperCase(int) 11511 * @since 1.5 11512 */ 11513 public static int toTitleCase(int codePoint) { 11514 return CharacterData.of(codePoint).toTitleCase(codePoint); 11515 } 11516 11517 /** 11518 * Returns the numeric value of the character {@code ch} in the 11519 * specified radix. 11520 * <p> 11521 * If the radix is not in the range {@code MIN_RADIX} ≤ 11522 * {@code radix} ≤ {@code MAX_RADIX} or if the 11523 * value of {@code ch} is not a valid digit in the specified 11524 * radix, {@code -1} is returned. A character is a valid digit 11525 * if at least one of the following is true: 11526 * <ul> 11527 * <li>The method {@code isDigit} is {@code true} of the character 11528 * and the Unicode decimal digit value of the character (or its 11529 * single-character decomposition) is less than the specified radix. 11530 * In this case the decimal digit value is returned. 11531 * <li>The character is one of the uppercase Latin letters 11532 * {@code 'A'} through {@code 'Z'} and its code is less than 11533 * {@code radix + 'A' - 10}. 11534 * In this case, {@code ch - 'A' + 10} 11535 * is returned. 11536 * <li>The character is one of the lowercase Latin letters 11537 * {@code 'a'} through {@code 'z'} and its code is less than 11538 * {@code radix + 'a' - 10}. 11539 * In this case, {@code ch - 'a' + 10} 11540 * is returned. 11541 * <li>The character is one of the fullwidth uppercase Latin letters A 11542 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11543 * and its code is less than 11544 * {@code radix + '\u005CuFF21' - 10}. 11545 * In this case, {@code ch - '\u005CuFF21' + 10} 11546 * is returned. 11547 * <li>The character is one of the fullwidth lowercase Latin letters a 11548 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11549 * and its code is less than 11550 * {@code radix + '\u005CuFF41' - 10}. 11551 * In this case, {@code ch - '\u005CuFF41' + 10} 11552 * is returned. 11553 * </ul> 11554 * 11555 * <p><b>Note:</b> This method cannot handle <a 11556 * href="#supplementary"> supplementary characters</a>. To support 11557 * all Unicode characters, including supplementary characters, use 11558 * the {@link #digit(int, int)} method. 11559 * 11560 * @param ch the character to be converted. 11561 * @param radix the radix. 11562 * @return the numeric value represented by the character in the 11563 * specified radix. 11564 * @see Character#forDigit(int, int) 11565 * @see Character#isDigit(char) 11566 */ 11567 public static int digit(char ch, int radix) { 11568 return digit((int)ch, radix); 11569 } 11570 11571 /** 11572 * Returns the numeric value of the specified character (Unicode 11573 * code point) in the specified radix. 11574 * 11575 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11576 * {@code radix} ≤ {@code MAX_RADIX} or if the 11577 * character is not a valid digit in the specified 11578 * radix, {@code -1} is returned. A character is a valid digit 11579 * if at least one of the following is true: 11580 * <ul> 11581 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11582 * and the Unicode decimal digit value of the character (or its 11583 * single-character decomposition) is less than the specified radix. 11584 * In this case the decimal digit value is returned. 11585 * <li>The character is one of the uppercase Latin letters 11586 * {@code 'A'} through {@code 'Z'} and its code is less than 11587 * {@code radix + 'A' - 10}. 11588 * In this case, {@code codePoint - 'A' + 10} 11589 * is returned. 11590 * <li>The character is one of the lowercase Latin letters 11591 * {@code 'a'} through {@code 'z'} and its code is less than 11592 * {@code radix + 'a' - 10}. 11593 * In this case, {@code codePoint - 'a' + 10} 11594 * is returned. 11595 * <li>The character is one of the fullwidth uppercase Latin letters A 11596 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11597 * and its code is less than 11598 * {@code radix + '\u005CuFF21' - 10}. 11599 * In this case, 11600 * {@code codePoint - '\u005CuFF21' + 10} 11601 * is returned. 11602 * <li>The character is one of the fullwidth lowercase Latin letters a 11603 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11604 * and its code is less than 11605 * {@code radix + '\u005CuFF41'- 10}. 11606 * In this case, 11607 * {@code codePoint - '\u005CuFF41' + 10} 11608 * is returned. 11609 * </ul> 11610 * 11611 * @param codePoint the character (Unicode code point) to be converted. 11612 * @param radix the radix. 11613 * @return the numeric value represented by the character in the 11614 * specified radix. 11615 * @see Character#forDigit(int, int) 11616 * @see Character#isDigit(int) 11617 * @since 1.5 11618 */ 11619 public static int digit(int codePoint, int radix) { 11620 return CharacterData.of(codePoint).digit(codePoint, radix); 11621 } 11622 11623 /** 11624 * Returns the {@code int} value that the specified Unicode 11625 * character represents. For example, the character 11626 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11627 * an int with a value of 50. 11628 * <p> 11629 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11630 * {@code '\u005Cu005A'}), lowercase 11631 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11632 * full width variant ({@code '\u005CuFF21'} through 11633 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11634 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11635 * through 35. This is independent of the Unicode specification, 11636 * which does not assign numeric values to these {@code char} 11637 * values. 11638 * <p> 11639 * If the character does not have a numeric value, then -1 is returned. 11640 * If the character has a numeric value that cannot be represented as a 11641 * nonnegative integer (for example, a fractional value), then -2 11642 * is returned. 11643 * 11644 * <p><b>Note:</b> This method cannot handle <a 11645 * href="#supplementary"> supplementary characters</a>. To support 11646 * all Unicode characters, including supplementary characters, use 11647 * the {@link #getNumericValue(int)} method. 11648 * 11649 * @param ch the character to be converted. 11650 * @return the numeric value of the character, as a nonnegative {@code int} 11651 * value; -2 if the character has a numeric value but the value 11652 * can not be represented as a nonnegative {@code int} value; 11653 * -1 if the character has no numeric value. 11654 * @see Character#forDigit(int, int) 11655 * @see Character#isDigit(char) 11656 * @since 1.1 11657 */ 11658 public static int getNumericValue(char ch) { 11659 return getNumericValue((int)ch); 11660 } 11661 11662 /** 11663 * Returns the {@code int} value that the specified 11664 * character (Unicode code point) represents. For example, the character 11665 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11666 * an {@code int} with a value of 50. 11667 * <p> 11668 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11669 * {@code '\u005Cu005A'}), lowercase 11670 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11671 * full width variant ({@code '\u005CuFF21'} through 11672 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11673 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11674 * through 35. This is independent of the Unicode specification, 11675 * which does not assign numeric values to these {@code char} 11676 * values. 11677 * <p> 11678 * If the character does not have a numeric value, then -1 is returned. 11679 * If the character has a numeric value that cannot be represented as a 11680 * nonnegative integer (for example, a fractional value), then -2 11681 * is returned. 11682 * 11683 * @param codePoint the character (Unicode code point) to be converted. 11684 * @return the numeric value of the character, as a nonnegative {@code int} 11685 * value; -2 if the character has a numeric value but the value 11686 * can not be represented as a nonnegative {@code int} value; 11687 * -1 if the character has no numeric value. 11688 * @see Character#forDigit(int, int) 11689 * @see Character#isDigit(int) 11690 * @since 1.5 11691 */ 11692 public static int getNumericValue(int codePoint) { 11693 return CharacterData.of(codePoint).getNumericValue(codePoint); 11694 } 11695 11696 /** 11697 * Determines if the specified character is ISO-LATIN-1 white space. 11698 * This method returns {@code true} for the following five 11699 * characters only: 11700 * <table class="striped"> 11701 * <caption style="display:none">truechars</caption> 11702 * <thead> 11703 * <tr><th scope="col">Character 11704 * <th scope="col">Code 11705 * <th scope="col">Name 11706 * </thead> 11707 * <tbody> 11708 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11709 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11710 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11711 * <td>{@code NEW LINE}</td></tr> 11712 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11713 * <td>{@code FORM FEED}</td></tr> 11714 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11715 * <td>{@code CARRIAGE RETURN}</td></tr> 11716 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11717 * <td>{@code SPACE}</td></tr> 11718 * </tbody> 11719 * </table> 11720 * 11721 * @param ch the character to be tested. 11722 * @return {@code true} if the character is ISO-LATIN-1 white 11723 * space; {@code false} otherwise. 11724 * @see Character#isSpaceChar(char) 11725 * @see Character#isWhitespace(char) 11726 * @deprecated Replaced by isWhitespace(char). 11727 */ 11728 @Deprecated(since="1.1") 11729 public static boolean isSpace(char ch) { 11730 return (ch <= 0x0020) && 11731 (((((1L << 0x0009) | 11732 (1L << 0x000A) | 11733 (1L << 0x000C) | 11734 (1L << 0x000D) | 11735 (1L << 0x0020)) >> ch) & 1L) != 0); 11736 } 11737 11738 11739 /** 11740 * Determines if the specified character is a Unicode space character. 11741 * A character is considered to be a space character if and only if 11742 * it is specified to be a space character by the Unicode Standard. This 11743 * method returns true if the character's general category type is any of 11744 * the following: 11745 * <ul> 11746 * <li> {@code SPACE_SEPARATOR} 11747 * <li> {@code LINE_SEPARATOR} 11748 * <li> {@code PARAGRAPH_SEPARATOR} 11749 * </ul> 11750 * 11751 * <p><b>Note:</b> This method cannot handle <a 11752 * href="#supplementary"> supplementary characters</a>. To support 11753 * all Unicode characters, including supplementary characters, use 11754 * the {@link #isSpaceChar(int)} method. 11755 * 11756 * @param ch the character to be tested. 11757 * @return {@code true} if the character is a space character; 11758 * {@code false} otherwise. 11759 * @see Character#isWhitespace(char) 11760 * @since 1.1 11761 */ 11762 public static boolean isSpaceChar(char ch) { 11763 return isSpaceChar((int)ch); 11764 } 11765 11766 /** 11767 * Determines if the specified character (Unicode code point) is a 11768 * Unicode space character. A character is considered to be a 11769 * space character if and only if it is specified to be a space 11770 * character by the Unicode Standard. This method returns true if 11771 * the character's general category type is any of the following: 11772 * 11773 * <ul> 11774 * <li> {@link #SPACE_SEPARATOR} 11775 * <li> {@link #LINE_SEPARATOR} 11776 * <li> {@link #PARAGRAPH_SEPARATOR} 11777 * </ul> 11778 * 11779 * @param codePoint the character (Unicode code point) to be tested. 11780 * @return {@code true} if the character is a space character; 11781 * {@code false} otherwise. 11782 * @see Character#isWhitespace(int) 11783 * @since 1.5 11784 */ 11785 public static boolean isSpaceChar(int codePoint) { 11786 return ((((1 << Character.SPACE_SEPARATOR) | 11787 (1 << Character.LINE_SEPARATOR) | 11788 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11789 != 0; 11790 } 11791 11792 /** 11793 * Determines if the specified character is white space according to Java. 11794 * A character is a Java whitespace character if and only if it satisfies 11795 * one of the following criteria: 11796 * <ul> 11797 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11798 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11799 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11800 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11801 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11802 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11803 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11804 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11805 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11806 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11807 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11808 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11809 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11810 * </ul> 11811 * 11812 * <p><b>Note:</b> This method cannot handle <a 11813 * href="#supplementary"> supplementary characters</a>. To support 11814 * all Unicode characters, including supplementary characters, use 11815 * the {@link #isWhitespace(int)} method. 11816 * 11817 * @param ch the character to be tested. 11818 * @return {@code true} if the character is a Java whitespace 11819 * character; {@code false} otherwise. 11820 * @see Character#isSpaceChar(char) 11821 * @since 1.1 11822 */ 11823 public static boolean isWhitespace(char ch) { 11824 return isWhitespace((int)ch); 11825 } 11826 11827 /** 11828 * Determines if the specified character (Unicode code point) is 11829 * white space according to Java. A character is a Java 11830 * whitespace character if and only if it satisfies one of the 11831 * following criteria: 11832 * <ul> 11833 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11834 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11835 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11836 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11837 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11838 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11839 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11840 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11841 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11842 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11843 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11844 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11845 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11846 * </ul> 11847 * 11848 * @param codePoint the character (Unicode code point) to be tested. 11849 * @return {@code true} if the character is a Java whitespace 11850 * character; {@code false} otherwise. 11851 * @see Character#isSpaceChar(int) 11852 * @since 1.5 11853 */ 11854 public static boolean isWhitespace(int codePoint) { 11855 return CharacterData.of(codePoint).isWhitespace(codePoint); 11856 } 11857 11858 /** 11859 * Determines if the specified character is an ISO control 11860 * character. A character is considered to be an ISO control 11861 * character if its code is in the range {@code '\u005Cu0000'} 11862 * through {@code '\u005Cu001F'} or in the range 11863 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11864 * 11865 * <p><b>Note:</b> This method cannot handle <a 11866 * href="#supplementary"> supplementary characters</a>. To support 11867 * all Unicode characters, including supplementary characters, use 11868 * the {@link #isISOControl(int)} method. 11869 * 11870 * @param ch the character to be tested. 11871 * @return {@code true} if the character is an ISO control character; 11872 * {@code false} otherwise. 11873 * 11874 * @see Character#isSpaceChar(char) 11875 * @see Character#isWhitespace(char) 11876 * @since 1.1 11877 */ 11878 public static boolean isISOControl(char ch) { 11879 return isISOControl((int)ch); 11880 } 11881 11882 /** 11883 * Determines if the referenced character (Unicode code point) is an ISO control 11884 * character. A character is considered to be an ISO control 11885 * character if its code is in the range {@code '\u005Cu0000'} 11886 * through {@code '\u005Cu001F'} or in the range 11887 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11888 * 11889 * @param codePoint the character (Unicode code point) to be tested. 11890 * @return {@code true} if the character is an ISO control character; 11891 * {@code false} otherwise. 11892 * @see Character#isSpaceChar(int) 11893 * @see Character#isWhitespace(int) 11894 * @since 1.5 11895 */ 11896 public static boolean isISOControl(int codePoint) { 11897 // Optimized form of: 11898 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11899 // (codePoint >= 0x7F && codePoint <= 0x9F); 11900 return codePoint <= 0x9F && 11901 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11902 } 11903 11904 /** 11905 * Returns a value indicating a character's general category. 11906 * 11907 * <p><b>Note:</b> This method cannot handle <a 11908 * href="#supplementary"> supplementary characters</a>. To support 11909 * all Unicode characters, including supplementary characters, use 11910 * the {@link #getType(int)} method. 11911 * 11912 * @param ch the character to be tested. 11913 * @return a value of type {@code int} representing the 11914 * character's general category. 11915 * @see Character#COMBINING_SPACING_MARK 11916 * @see Character#CONNECTOR_PUNCTUATION 11917 * @see Character#CONTROL 11918 * @see Character#CURRENCY_SYMBOL 11919 * @see Character#DASH_PUNCTUATION 11920 * @see Character#DECIMAL_DIGIT_NUMBER 11921 * @see Character#ENCLOSING_MARK 11922 * @see Character#END_PUNCTUATION 11923 * @see Character#FINAL_QUOTE_PUNCTUATION 11924 * @see Character#FORMAT 11925 * @see Character#INITIAL_QUOTE_PUNCTUATION 11926 * @see Character#LETTER_NUMBER 11927 * @see Character#LINE_SEPARATOR 11928 * @see Character#LOWERCASE_LETTER 11929 * @see Character#MATH_SYMBOL 11930 * @see Character#MODIFIER_LETTER 11931 * @see Character#MODIFIER_SYMBOL 11932 * @see Character#NON_SPACING_MARK 11933 * @see Character#OTHER_LETTER 11934 * @see Character#OTHER_NUMBER 11935 * @see Character#OTHER_PUNCTUATION 11936 * @see Character#OTHER_SYMBOL 11937 * @see Character#PARAGRAPH_SEPARATOR 11938 * @see Character#PRIVATE_USE 11939 * @see Character#SPACE_SEPARATOR 11940 * @see Character#START_PUNCTUATION 11941 * @see Character#SURROGATE 11942 * @see Character#TITLECASE_LETTER 11943 * @see Character#UNASSIGNED 11944 * @see Character#UPPERCASE_LETTER 11945 * @since 1.1 11946 */ 11947 public static int getType(char ch) { 11948 return getType((int)ch); 11949 } 11950 11951 /** 11952 * Returns a value indicating a character's general category. 11953 * 11954 * @param codePoint the character (Unicode code point) to be tested. 11955 * @return a value of type {@code int} representing the 11956 * character's general category. 11957 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11958 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11959 * @see Character#CONTROL CONTROL 11960 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11961 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11962 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11963 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11964 * @see Character#END_PUNCTUATION END_PUNCTUATION 11965 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11966 * @see Character#FORMAT FORMAT 11967 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11968 * @see Character#LETTER_NUMBER LETTER_NUMBER 11969 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11970 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11971 * @see Character#MATH_SYMBOL MATH_SYMBOL 11972 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11973 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11974 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11975 * @see Character#OTHER_LETTER OTHER_LETTER 11976 * @see Character#OTHER_NUMBER OTHER_NUMBER 11977 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11978 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11979 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11980 * @see Character#PRIVATE_USE PRIVATE_USE 11981 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11982 * @see Character#START_PUNCTUATION START_PUNCTUATION 11983 * @see Character#SURROGATE SURROGATE 11984 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11985 * @see Character#UNASSIGNED UNASSIGNED 11986 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11987 * @since 1.5 11988 */ 11989 public static int getType(int codePoint) { 11990 return CharacterData.of(codePoint).getType(codePoint); 11991 } 11992 11993 /** 11994 * Determines the character representation for a specific digit in 11995 * the specified radix. If the value of {@code radix} is not a 11996 * valid radix, or the value of {@code digit} is not a valid 11997 * digit in the specified radix, the null character 11998 * ({@code '\u005Cu0000'}) is returned. 11999 * <p> 12000 * The {@code radix} argument is valid if it is greater than or 12001 * equal to {@code MIN_RADIX} and less than or equal to 12002 * {@code MAX_RADIX}. The {@code digit} argument is valid if 12003 * {@code 0 <= digit < radix}. 12004 * <p> 12005 * If the digit is less than 10, then 12006 * {@code '0' + digit} is returned. Otherwise, the value 12007 * {@code 'a' + digit - 10} is returned. 12008 * 12009 * @param digit the number to convert to a character. 12010 * @param radix the radix. 12011 * @return the {@code char} representation of the specified digit 12012 * in the specified radix. 12013 * @see Character#MIN_RADIX 12014 * @see Character#MAX_RADIX 12015 * @see Character#digit(char, int) 12016 */ 12017 public static char forDigit(int digit, int radix) { 12018 if ((digit >= radix) || (digit < 0)) { 12019 return '\0'; 12020 } 12021 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 12022 return '\0'; 12023 } 12024 if (digit < 10) { 12025 return (char)('0' + digit); 12026 } 12027 return (char)('a' - 10 + digit); 12028 } 12029 12030 /** 12031 * Returns the Unicode directionality property for the given 12032 * character. Character directionality is used to calculate the 12033 * visual ordering of text. The directionality value of undefined 12034 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 12035 * 12036 * <p><b>Note:</b> This method cannot handle <a 12037 * href="#supplementary"> supplementary characters</a>. To support 12038 * all Unicode characters, including supplementary characters, use 12039 * the {@link #getDirectionality(int)} method. 12040 * 12041 * @param ch {@code char} for which the directionality property 12042 * is requested. 12043 * @return the directionality property of the {@code char} value. 12044 * 12045 * @see Character#DIRECTIONALITY_UNDEFINED 12046 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 12047 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 12048 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 12049 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 12050 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 12051 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 12052 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 12053 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 12054 * @see Character#DIRECTIONALITY_NONSPACING_MARK 12055 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 12056 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 12057 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 12058 * @see Character#DIRECTIONALITY_WHITESPACE 12059 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 12060 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 12061 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 12062 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 12063 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 12064 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 12065 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 12066 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 12067 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 12068 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 12069 * @since 1.4 12070 */ 12071 public static byte getDirectionality(char ch) { 12072 return getDirectionality((int)ch); 12073 } 12074 12075 /** 12076 * Returns the Unicode directionality property for the given 12077 * character (Unicode code point). Character directionality is 12078 * used to calculate the visual ordering of text. The 12079 * directionality value of undefined character is {@link 12080 * #DIRECTIONALITY_UNDEFINED}. 12081 * 12082 * @param codePoint the character (Unicode code point) for which 12083 * the directionality property is requested. 12084 * @return the directionality property of the character. 12085 * 12086 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 12087 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 12088 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 12089 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 12090 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 12091 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 12092 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 12093 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 12094 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 12095 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 12096 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 12097 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 12098 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 12099 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 12100 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 12101 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 12102 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 12103 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 12104 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 12105 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 12106 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 12107 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 12108 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 12109 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 12110 * @since 1.5 12111 */ 12112 public static byte getDirectionality(int codePoint) { 12113 return CharacterData.of(codePoint).getDirectionality(codePoint); 12114 } 12115 12116 /** 12117 * Determines whether the character is mirrored according to the 12118 * Unicode specification. Mirrored characters should have their 12119 * glyphs horizontally mirrored when displayed in text that is 12120 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 12121 * PARENTHESIS is semantically defined to be an <i>opening 12122 * parenthesis</i>. This will appear as a "(" in text that is 12123 * left-to-right but as a ")" in text that is right-to-left. 12124 * 12125 * <p><b>Note:</b> This method cannot handle <a 12126 * href="#supplementary"> supplementary characters</a>. To support 12127 * all Unicode characters, including supplementary characters, use 12128 * the {@link #isMirrored(int)} method. 12129 * 12130 * @param ch {@code char} for which the mirrored property is requested 12131 * @return {@code true} if the char is mirrored, {@code false} 12132 * if the {@code char} is not mirrored or is not defined. 12133 * @since 1.4 12134 */ 12135 public static boolean isMirrored(char ch) { 12136 return isMirrored((int)ch); 12137 } 12138 12139 /** 12140 * Determines whether the specified character (Unicode code point) 12141 * is mirrored according to the Unicode specification. Mirrored 12142 * characters should have their glyphs horizontally mirrored when 12143 * displayed in text that is right-to-left. For example, 12144 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 12145 * defined to be an <i>opening parenthesis</i>. This will appear 12146 * as a "(" in text that is left-to-right but as a ")" in text 12147 * that is right-to-left. 12148 * 12149 * @param codePoint the character (Unicode code point) to be tested. 12150 * @return {@code true} if the character is mirrored, {@code false} 12151 * if the character is not mirrored or is not defined. 12152 * @since 1.5 12153 */ 12154 public static boolean isMirrored(int codePoint) { 12155 return CharacterData.of(codePoint).isMirrored(codePoint); 12156 } 12157 12158 /** 12159 * Compares two {@code Character} objects numerically. 12160 * 12161 * @param anotherCharacter the {@code Character} to be compared. 12162 * @return the value {@code 0} if the argument {@code Character} 12163 * is equal to this {@code Character}; a value less than 12164 * {@code 0} if this {@code Character} is numerically less 12165 * than the {@code Character} argument; and a value greater than 12166 * {@code 0} if this {@code Character} is numerically greater 12167 * than the {@code Character} argument (unsigned comparison). 12168 * Note that this is strictly a numerical comparison; it is not 12169 * locale-dependent. 12170 * @since 1.2 12171 */ 12172 public int compareTo(Character anotherCharacter) { 12173 return compare(this.value, anotherCharacter.value); 12174 } 12175 12176 /** 12177 * Compares two {@code char} values numerically. 12178 * The value returned is identical to what would be returned by: 12179 * <pre> 12180 * Character.valueOf(x).compareTo(Character.valueOf(y)) 12181 * </pre> 12182 * 12183 * @param x the first {@code char} to compare 12184 * @param y the second {@code char} to compare 12185 * @return the value {@code 0} if {@code x == y}; 12186 * a value less than {@code 0} if {@code x < y}; and 12187 * a value greater than {@code 0} if {@code x > y} 12188 * @since 1.7 12189 */ 12190 public static int compare(char x, char y) { 12191 return x - y; 12192 } 12193 12194 /** 12195 * Converts the character (Unicode code point) argument to uppercase using 12196 * information from the UnicodeData file. 12197 * 12198 * @param codePoint the character (Unicode code point) to be converted. 12199 * @return either the uppercase equivalent of the character, if 12200 * any, or an error flag ({@code Character.ERROR}) 12201 * that indicates that a 1:M {@code char} mapping exists. 12202 * @see Character#isLowerCase(char) 12203 * @see Character#isUpperCase(char) 12204 * @see Character#toLowerCase(char) 12205 * @see Character#toTitleCase(char) 12206 * @since 1.4 12207 */ 12208 static int toUpperCaseEx(int codePoint) { 12209 assert isValidCodePoint(codePoint); 12210 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 12211 } 12212 12213 /** 12214 * Converts the character (Unicode code point) argument to uppercase using case 12215 * mapping information from the SpecialCasing file in the Unicode 12216 * specification. If a character has no explicit uppercase 12217 * mapping, then the {@code char} itself is returned in the 12218 * {@code char[]}. 12219 * 12220 * @param codePoint the character (Unicode code point) to be converted. 12221 * @return a {@code char[]} with the uppercased character. 12222 * @since 1.4 12223 */ 12224 static char[] toUpperCaseCharArray(int codePoint) { 12225 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 12226 assert isBmpCodePoint(codePoint); 12227 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 12228 } 12229 12230 /** 12231 * The number of bits used to represent a {@code char} value in unsigned 12232 * binary form, constant {@code 16}. 12233 * 12234 * @since 1.5 12235 */ 12236 public static final int SIZE = 16; 12237 12238 /** 12239 * The number of bytes used to represent a {@code char} value in unsigned 12240 * binary form. 12241 * 12242 * @since 1.8 12243 */ 12244 public static final int BYTES = SIZE / Byte.SIZE; 12245 12246 /** 12247 * Returns the value obtained by reversing the order of the bytes in the 12248 * specified {@code char} value. 12249 * 12250 * @param ch The {@code char} of which to reverse the byte order. 12251 * @return the value obtained by reversing (or, equivalently, swapping) 12252 * the bytes in the specified {@code char} value. 12253 * @since 1.5 12254 */ 12255 @IntrinsicCandidate 12256 public static char reverseBytes(char ch) { 12257 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 12258 } 12259 12260 /** 12261 * Returns the name of the specified character 12262 * {@code codePoint}, or null if the code point is 12263 * {@link #UNASSIGNED unassigned}. 12264 * <p> 12265 * If the specified character is not assigned a name by 12266 * the <i>UnicodeData</i> file (part of the Unicode Character 12267 * Database maintained by the Unicode Consortium), the returned 12268 * name is the same as the result of the expression: 12269 * 12270 * <blockquote>{@code 12271 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12272 * + " " 12273 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12274 * 12275 * }</blockquote> 12276 * 12277 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 12278 * returned by this method follows the naming scheme in the 12279 * "Unicode Name Property" section of the Unicode Standard. For other 12280 * code points, such as Hangul/Ideographs, The name generation rule above 12281 * differs from the one defined in the Unicode Standard. 12282 * 12283 * @param codePoint the character (Unicode code point) 12284 * 12285 * @return the name of the specified character, or null if 12286 * the code point is unassigned. 12287 * 12288 * @throws IllegalArgumentException if the specified 12289 * {@code codePoint} is not a valid Unicode 12290 * code point. 12291 * 12292 * @since 1.7 12293 */ 12294 public static String getName(int codePoint) { 12295 if (!isValidCodePoint(codePoint)) { 12296 throw new IllegalArgumentException( 12297 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 12298 } 12299 String name = CharacterName.getInstance().getName(codePoint); 12300 if (name != null) 12301 return name; 12302 if (getType(codePoint) == UNASSIGNED) 12303 return null; 12304 UnicodeBlock block = UnicodeBlock.of(codePoint); 12305 if (block != null) 12306 return block.toString().replace('_', ' ') + " " 12307 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12308 // should never come here 12309 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12310 } 12311 12312 /** 12313 * Returns the code point value of the Unicode character specified by 12314 * the given character name. 12315 * <p> 12316 * If a character is not assigned a name by the <i>UnicodeData</i> 12317 * file (part of the Unicode Character Database maintained by the Unicode 12318 * Consortium), its name is defined as the result of the expression: 12319 * 12320 * <blockquote>{@code 12321 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12322 * + " " 12323 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12324 * 12325 * }</blockquote> 12326 * <p> 12327 * The {@code name} matching is case insensitive, with any leading and 12328 * trailing whitespace character removed. 12329 * 12330 * For the code points in the <i>UnicodeData</i> file, this method 12331 * recognizes the name which conforms to the name defined in the 12332 * "Unicode Name Property" section in the Unicode Standard. For other 12333 * code points, this method recognizes the name generated with 12334 * {@link #getName(int)} method. 12335 * 12336 * @param name the character name 12337 * 12338 * @return the code point value of the character specified by its name. 12339 * 12340 * @throws IllegalArgumentException if the specified {@code name} 12341 * is not a valid character name. 12342 * @throws NullPointerException if {@code name} is {@code null} 12343 * 12344 * @since 9 12345 */ 12346 public static int codePointOf(String name) { 12347 name = name.trim().toUpperCase(Locale.ROOT); 12348 int cp = CharacterName.getInstance().getCodePoint(name); 12349 if (cp != -1) 12350 return cp; 12351 try { 12352 int off = name.lastIndexOf(' '); 12353 if (off != -1) { 12354 cp = Integer.parseInt(name, off + 1, name.length(), 16); 12355 if (isValidCodePoint(cp) && name.equals(getName(cp))) 12356 return cp; 12357 } 12358 } catch (Exception x) {} 12359 throw new IllegalArgumentException("Unrecognized character name :" + name); 12360 } 12361 } --- EOF ---