1 /* 2 * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import jdk.internal.misc.CDS; 29 import jdk.internal.vm.annotation.IntrinsicCandidate; 30 import jdk.internal.vm.annotation.Stable; 31 32 import java.lang.constant.Constable; 33 import java.lang.constant.DynamicConstantDesc; 34 import java.util.Arrays; 35 import java.util.HashMap; 36 import java.util.Locale; 37 import java.util.Map; 38 import java.util.Objects; 39 import java.util.Optional; 40 41 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 42 import static java.lang.constant.ConstantDescs.CD_char; 43 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 44 45 /** 46 * The {@code Character} class is the {@linkplain 47 * java.lang##wrapperClass wrapper class} for values of the primitive 48 * type {@code char}. An object of type {@code Character} contains a 49 * single field whose type is {@code char}. 50 * 51 * <p>In addition, this class provides a large number of static methods for 52 * determining a character's category (lowercase letter, digit, etc.) 53 * and for converting characters from uppercase to lowercase and vice 54 * versa. 55 * 56 * <h2><a id="conformance">Unicode Conformance</a></h2> 57 * <p> 58 * The fields and methods of class {@code Character} are defined in terms 59 * of character information from the Unicode Standard, specifically the 60 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 61 * This file specifies properties including name and category for every 62 * assigned Unicode code point or character range. The file is available 63 * from the Unicode Consortium at 64 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 65 * <p> 66 * Character information is based on the Unicode Standard, version 15.1. 67 * <p> 68 * The Java platform has supported different versions of the Unicode 69 * Standard over time. Upgrades to newer versions of the Unicode Standard 70 * occurred in the following Java releases, each indicating the new version: 71 * <table class="striped"> 72 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 73 * <thead> 74 * <tr><th scope="col">Java release</th> 75 * <th scope="col">Unicode version</th></tr> 76 * </thead> 77 * <tbody> 78 * <tr><th scope="row" style="text-align:left">Java SE 22</th> 79 * <td>Unicode 15.1</td></tr> 80 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 81 * <td>Unicode 15.0</td></tr> 82 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 83 * <td>Unicode 14.0</td></tr> 84 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 85 * <td>Unicode 13.0</td></tr> 86 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 87 * <td>Unicode 12.1</td></tr> 88 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 89 * <td>Unicode 11.0</td></tr> 90 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 91 * <td>Unicode 10.0</td></tr> 92 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 93 * <td>Unicode 8.0</td></tr> 94 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 95 * <td>Unicode 6.2</td></tr> 96 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 97 * <td>Unicode 6.0</td></tr> 98 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 99 * <td>Unicode 4.0</td></tr> 100 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 101 * <td>Unicode 3.0</td></tr> 102 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 103 * <td>Unicode 2.0</td></tr> 104 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 105 * <td>Unicode 1.1.5</td></tr> 106 * </tbody> 107 * </table> 108 * Variations from these base Unicode versions, such as recognized appendixes, 109 * are documented elsewhere. 110 * <h2><a id="unicode">Unicode Character Representations</a></h2> 111 * 112 * <p>The {@code char} data type (and therefore the value that a 113 * {@code Character} object encapsulates) are based on the 114 * original Unicode specification, which defined characters as 115 * fixed-width 16-bit entities. The Unicode Standard has since been 116 * changed to allow for characters whose representation requires more 117 * than 16 bits. The range of legal <em>code point</em>s is now 118 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 119 * (Refer to the <a 120 * href="http://www.unicode.org/reports/tr27/#notation"><i> 121 * definition</i></a> of the U+<i>n</i> notation in the Unicode 122 * Standard.) 123 * 124 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 125 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 126 * <a id="supplementary">Characters</a> whose code points are greater 127 * than U+FFFF are called <em>supplementary character</em>s. The Java 128 * platform uses the UTF-16 representation in {@code char} arrays and 129 * in the {@code String} and {@code StringBuffer} classes. In 130 * this representation, supplementary characters are represented as a pair 131 * of {@code char} values, the first from the <em>high-surrogates</em> 132 * range, (\uD800-\uDBFF), the second from the 133 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 134 * 135 * <p>A {@code char} value, therefore, represents Basic 136 * Multilingual Plane (BMP) code points, including the surrogate 137 * code points, or code units of the UTF-16 encoding. An 138 * {@code int} value represents all Unicode code points, 139 * including supplementary code points. The lower (least significant) 140 * 21 bits of {@code int} are used to represent Unicode code 141 * points and the upper (most significant) 11 bits must be zero. 142 * Unless otherwise specified, the behavior with respect to 143 * supplementary characters and surrogate {@code char} values is 144 * as follows: 145 * 146 * <ul> 147 * <li>The methods that only accept a {@code char} value cannot support 148 * supplementary characters. They treat {@code char} values from the 149 * surrogate ranges as undefined characters. For example, 150 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 151 * this specific value if followed by any low-surrogate value in a string 152 * would represent a letter. 153 * 154 * <li>The methods that accept an {@code int} value support all 155 * Unicode characters, including supplementary characters. For 156 * example, {@code Character.isLetter(0x2F81A)} returns 157 * {@code true} because the code point value represents a letter 158 * (a CJK ideograph). 159 * </ul> 160 * 161 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 162 * used for character values in the range between U+0000 and U+10FFFF, 163 * and <em>Unicode code unit</em> is used for 16-bit 164 * {@code char} values that are code units of the <em>UTF-16</em> 165 * encoding. For more information on Unicode terminology, refer to the 166 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 167 * 168 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 169 * class; programmers should treat instances that are 170 * {@linkplain #equals(Object) equal} as interchangeable and should not 171 * use instances for synchronization, or unpredictable behavior may 172 * occur. For example, in a future release, synchronization may fail. 173 * 174 * @spec https://www.unicode.org/reports/tr27 Unicode 3.1.0 175 * @author Lee Boynton 176 * @author Guy Steele 177 * @author Akira Tanaka 178 * @author Martin Buchholz 179 * @author Ulf Zibis 180 * @since 1.0 181 */ 182 @jdk.internal.ValueBased 183 public final 184 class Character implements java.io.Serializable, Comparable<Character>, Constable { 185 /** 186 * The minimum radix available for conversion to and from strings. 187 * The constant value of this field is the smallest value permitted 188 * for the radix argument in radix-conversion methods such as the 189 * {@code digit} method, the {@code forDigit} method, and the 190 * {@code toString} method of class {@code Integer}. 191 * 192 * @see Character#digit(char, int) 193 * @see Character#forDigit(int, int) 194 * @see Integer#toString(int, int) 195 * @see Integer#valueOf(String) 196 */ 197 public static final int MIN_RADIX = 2; 198 199 /** 200 * The maximum radix available for conversion to and from strings. 201 * The constant value of this field is the largest value permitted 202 * for the radix argument in radix-conversion methods such as the 203 * {@code digit} method, the {@code forDigit} method, and the 204 * {@code toString} method of class {@code Integer}. 205 * 206 * @see Character#digit(char, int) 207 * @see Character#forDigit(int, int) 208 * @see Integer#toString(int, int) 209 * @see Integer#valueOf(String) 210 */ 211 public static final int MAX_RADIX = 36; 212 213 /** 214 * The constant value of this field is the smallest value of type 215 * {@code char}, {@code '\u005Cu0000'}. 216 * 217 * @since 1.0.2 218 */ 219 public static final char MIN_VALUE = '\u0000'; 220 221 /** 222 * The constant value of this field is the largest value of type 223 * {@code char}, {@code '\u005CuFFFF'}. 224 * 225 * @since 1.0.2 226 */ 227 public static final char MAX_VALUE = '\uFFFF'; 228 229 /** 230 * The {@code Class} instance representing the primitive type 231 * {@code char}. 232 * 233 * @since 1.1 234 */ 235 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 236 237 /* 238 * Normative general types 239 */ 240 241 /* 242 * General character types 243 */ 244 245 /** 246 * General category "Cn" in the Unicode specification. 247 * @since 1.1 248 */ 249 public static final byte UNASSIGNED = 0; 250 251 /** 252 * General category "Lu" in the Unicode specification. 253 * @since 1.1 254 */ 255 public static final byte UPPERCASE_LETTER = 1; 256 257 /** 258 * General category "Ll" in the Unicode specification. 259 * @since 1.1 260 */ 261 public static final byte LOWERCASE_LETTER = 2; 262 263 /** 264 * General category "Lt" in the Unicode specification. 265 * @since 1.1 266 */ 267 public static final byte TITLECASE_LETTER = 3; 268 269 /** 270 * General category "Lm" in the Unicode specification. 271 * @since 1.1 272 */ 273 public static final byte MODIFIER_LETTER = 4; 274 275 /** 276 * General category "Lo" in the Unicode specification. 277 * @since 1.1 278 */ 279 public static final byte OTHER_LETTER = 5; 280 281 /** 282 * General category "Mn" in the Unicode specification. 283 * @since 1.1 284 */ 285 public static final byte NON_SPACING_MARK = 6; 286 287 /** 288 * General category "Me" in the Unicode specification. 289 * @since 1.1 290 */ 291 public static final byte ENCLOSING_MARK = 7; 292 293 /** 294 * General category "Mc" in the Unicode specification. 295 * @since 1.1 296 */ 297 public static final byte COMBINING_SPACING_MARK = 8; 298 299 /** 300 * General category "Nd" in the Unicode specification. 301 * @since 1.1 302 */ 303 public static final byte DECIMAL_DIGIT_NUMBER = 9; 304 305 /** 306 * General category "Nl" in the Unicode specification. 307 * @since 1.1 308 */ 309 public static final byte LETTER_NUMBER = 10; 310 311 /** 312 * General category "No" in the Unicode specification. 313 * @since 1.1 314 */ 315 public static final byte OTHER_NUMBER = 11; 316 317 /** 318 * General category "Zs" in the Unicode specification. 319 * @since 1.1 320 */ 321 public static final byte SPACE_SEPARATOR = 12; 322 323 /** 324 * General category "Zl" in the Unicode specification. 325 * @since 1.1 326 */ 327 public static final byte LINE_SEPARATOR = 13; 328 329 /** 330 * General category "Zp" in the Unicode specification. 331 * @since 1.1 332 */ 333 public static final byte PARAGRAPH_SEPARATOR = 14; 334 335 /** 336 * General category "Cc" in the Unicode specification. 337 * @since 1.1 338 */ 339 public static final byte CONTROL = 15; 340 341 /** 342 * General category "Cf" in the Unicode specification. 343 * @since 1.1 344 */ 345 public static final byte FORMAT = 16; 346 347 /** 348 * General category "Co" in the Unicode specification. 349 * @since 1.1 350 */ 351 public static final byte PRIVATE_USE = 18; 352 353 /** 354 * General category "Cs" in the Unicode specification. 355 * @since 1.1 356 */ 357 public static final byte SURROGATE = 19; 358 359 /** 360 * General category "Pd" in the Unicode specification. 361 * @since 1.1 362 */ 363 public static final byte DASH_PUNCTUATION = 20; 364 365 /** 366 * General category "Ps" in the Unicode specification. 367 * @since 1.1 368 */ 369 public static final byte START_PUNCTUATION = 21; 370 371 /** 372 * General category "Pe" in the Unicode specification. 373 * @since 1.1 374 */ 375 public static final byte END_PUNCTUATION = 22; 376 377 /** 378 * General category "Pc" in the Unicode specification. 379 * @since 1.1 380 */ 381 public static final byte CONNECTOR_PUNCTUATION = 23; 382 383 /** 384 * General category "Po" in the Unicode specification. 385 * @since 1.1 386 */ 387 public static final byte OTHER_PUNCTUATION = 24; 388 389 /** 390 * General category "Sm" in the Unicode specification. 391 * @since 1.1 392 */ 393 public static final byte MATH_SYMBOL = 25; 394 395 /** 396 * General category "Sc" in the Unicode specification. 397 * @since 1.1 398 */ 399 public static final byte CURRENCY_SYMBOL = 26; 400 401 /** 402 * General category "Sk" in the Unicode specification. 403 * @since 1.1 404 */ 405 public static final byte MODIFIER_SYMBOL = 27; 406 407 /** 408 * General category "So" in the Unicode specification. 409 * @since 1.1 410 */ 411 public static final byte OTHER_SYMBOL = 28; 412 413 /** 414 * General category "Pi" in the Unicode specification. 415 * @since 1.4 416 */ 417 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 418 419 /** 420 * General category "Pf" in the Unicode specification. 421 * @since 1.4 422 */ 423 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 424 425 /** 426 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 427 */ 428 static final int ERROR = 0xFFFFFFFF; 429 430 431 /** 432 * Undefined bidirectional character type. Undefined {@code char} 433 * values have undefined directionality in the Unicode specification. 434 * @since 1.4 435 */ 436 public static final byte DIRECTIONALITY_UNDEFINED = -1; 437 438 /** 439 * Strong bidirectional character type "L" in the Unicode specification. 440 * @since 1.4 441 */ 442 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 443 444 /** 445 * Strong bidirectional character type "R" in the Unicode specification. 446 * @since 1.4 447 */ 448 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 449 450 /** 451 * Strong bidirectional character type "AL" in the Unicode specification. 452 * @since 1.4 453 */ 454 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 455 456 /** 457 * Weak bidirectional character type "EN" in the Unicode specification. 458 * @since 1.4 459 */ 460 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 461 462 /** 463 * Weak bidirectional character type "ES" in the Unicode specification. 464 * @since 1.4 465 */ 466 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 467 468 /** 469 * Weak bidirectional character type "ET" in the Unicode specification. 470 * @since 1.4 471 */ 472 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 473 474 /** 475 * Weak bidirectional character type "AN" in the Unicode specification. 476 * @since 1.4 477 */ 478 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 479 480 /** 481 * Weak bidirectional character type "CS" in the Unicode specification. 482 * @since 1.4 483 */ 484 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 485 486 /** 487 * Weak bidirectional character type "NSM" in the Unicode specification. 488 * @since 1.4 489 */ 490 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 491 492 /** 493 * Weak bidirectional character type "BN" in the Unicode specification. 494 * @since 1.4 495 */ 496 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 497 498 /** 499 * Neutral bidirectional character type "B" in the Unicode specification. 500 * @since 1.4 501 */ 502 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 503 504 /** 505 * Neutral bidirectional character type "S" in the Unicode specification. 506 * @since 1.4 507 */ 508 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 509 510 /** 511 * Neutral bidirectional character type "WS" in the Unicode specification. 512 * @since 1.4 513 */ 514 public static final byte DIRECTIONALITY_WHITESPACE = 12; 515 516 /** 517 * Neutral bidirectional character type "ON" in the Unicode specification. 518 * @since 1.4 519 */ 520 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 521 522 /** 523 * Strong bidirectional character type "LRE" in the Unicode specification. 524 * @since 1.4 525 */ 526 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 527 528 /** 529 * Strong bidirectional character type "LRO" in the Unicode specification. 530 * @since 1.4 531 */ 532 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 533 534 /** 535 * Strong bidirectional character type "RLE" in the Unicode specification. 536 * @since 1.4 537 */ 538 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 539 540 /** 541 * Strong bidirectional character type "RLO" in the Unicode specification. 542 * @since 1.4 543 */ 544 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 545 546 /** 547 * Weak bidirectional character type "PDF" in the Unicode specification. 548 * @since 1.4 549 */ 550 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 551 552 /** 553 * Weak bidirectional character type "LRI" in the Unicode specification. 554 * @since 9 555 */ 556 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 557 558 /** 559 * Weak bidirectional character type "RLI" in the Unicode specification. 560 * @since 9 561 */ 562 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 563 564 /** 565 * Weak bidirectional character type "FSI" in the Unicode specification. 566 * @since 9 567 */ 568 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 569 570 /** 571 * Weak bidirectional character type "PDI" in the Unicode specification. 572 * @since 9 573 */ 574 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 575 576 /** 577 * The minimum value of a 578 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 579 * Unicode high-surrogate code unit</a> 580 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 581 * A high-surrogate is also known as a <i>leading-surrogate</i>. 582 * 583 * @since 1.5 584 */ 585 public static final char MIN_HIGH_SURROGATE = '\uD800'; 586 587 /** 588 * The maximum value of a 589 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 590 * Unicode high-surrogate code unit</a> 591 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 592 * A high-surrogate is also known as a <i>leading-surrogate</i>. 593 * 594 * @since 1.5 595 */ 596 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 597 598 /** 599 * The minimum value of a 600 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 601 * Unicode low-surrogate code unit</a> 602 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 603 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 604 * 605 * @since 1.5 606 */ 607 public static final char MIN_LOW_SURROGATE = '\uDC00'; 608 609 /** 610 * The maximum value of a 611 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 612 * Unicode low-surrogate code unit</a> 613 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 614 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 615 * 616 * @since 1.5 617 */ 618 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 619 620 /** 621 * The minimum value of a Unicode surrogate code unit in the 622 * UTF-16 encoding, constant {@code '\u005CuD800'}. 623 * 624 * @since 1.5 625 */ 626 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 627 628 /** 629 * The maximum value of a Unicode surrogate code unit in the 630 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 631 * 632 * @since 1.5 633 */ 634 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 635 636 /** 637 * The minimum value of a 638 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 639 * Unicode supplementary code point</a>, constant {@code U+10000}. 640 * 641 * @since 1.5 642 */ 643 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 644 645 /** 646 * The minimum value of a 647 * <a href="http://www.unicode.org/glossary/#code_point"> 648 * Unicode code point</a>, constant {@code U+0000}. 649 * 650 * @since 1.5 651 */ 652 public static final int MIN_CODE_POINT = 0x000000; 653 654 /** 655 * The maximum value of a 656 * <a href="http://www.unicode.org/glossary/#code_point"> 657 * Unicode code point</a>, constant {@code U+10FFFF}. 658 * 659 * @since 1.5 660 */ 661 public static final int MAX_CODE_POINT = 0X10FFFF; 662 663 /** 664 * Returns an {@link Optional} containing the nominal descriptor for this 665 * instance. 666 * 667 * @return an {@link Optional} describing the {@linkplain Character} instance 668 * @since 15 669 */ 670 @Override 671 public Optional<DynamicConstantDesc<Character>> describeConstable() { 672 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 673 } 674 675 /** 676 * Instances of this class represent particular subsets of the Unicode 677 * character set. The only family of subsets defined in the 678 * {@code Character} class is {@link Character.UnicodeBlock}. 679 * Other portions of the Java API may define other subsets for their 680 * own purposes. 681 * 682 * @since 1.2 683 */ 684 public static class Subset { 685 686 private String name; 687 688 /** 689 * Constructs a new {@code Subset} instance. 690 * 691 * @param name The name of this subset 692 * @throws NullPointerException if name is {@code null} 693 */ 694 protected Subset(String name) { 695 if (name == null) { 696 throw new NullPointerException("name"); 697 } 698 this.name = name; 699 } 700 701 /** 702 * Compares two {@code Subset} objects for equality. 703 * This method returns {@code true} if and only if 704 * {@code this} and the argument refer to the same 705 * object; since this method is {@code final}, this 706 * guarantee holds for all subclasses. 707 */ 708 public final boolean equals(Object obj) { 709 return (this == obj); 710 } 711 712 /** 713 * Returns the standard hash code as defined by the 714 * {@link Object#hashCode} method. This method 715 * is {@code final} in order to ensure that the 716 * {@code equals} and {@code hashCode} methods will 717 * be consistent in all subclasses. 718 */ 719 public final int hashCode() { 720 return super.hashCode(); 721 } 722 723 /** 724 * Returns the name of this subset. 725 */ 726 public final String toString() { 727 return name; 728 } 729 } 730 731 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 732 // for the latest specification of Unicode Blocks. 733 734 /** 735 * A family of character subsets representing the character blocks in the 736 * Unicode specification. Character blocks generally define characters 737 * used for a specific script or purpose. A character is contained by 738 * at most one Unicode block. 739 * 740 * @since 1.2 741 */ 742 public static final class UnicodeBlock extends Subset { 743 /** 744 * NUM_ENTITIES should match the total number of UnicodeBlocks. 745 * It should be adjusted whenever the Unicode Character Database 746 * is upgraded. 747 */ 748 private static final int NUM_ENTITIES = 759; 749 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 750 751 /** 752 * Creates a UnicodeBlock with the given identifier name. 753 * This name must be the same as the block identifier. 754 */ 755 private UnicodeBlock(String idName) { 756 super(idName); 757 map.put(idName, this); 758 } 759 760 /** 761 * Creates a UnicodeBlock with the given identifier name and 762 * alias name. 763 */ 764 private UnicodeBlock(String idName, String alias) { 765 this(idName); 766 map.put(alias, this); 767 } 768 769 /** 770 * Creates a UnicodeBlock with the given identifier name and 771 * alias names. 772 */ 773 private UnicodeBlock(String idName, String... aliases) { 774 this(idName); 775 for (String alias : aliases) 776 map.put(alias, this); 777 } 778 779 /** 780 * Constant for the "Basic Latin" Unicode character block. 781 * @since 1.2 782 */ 783 public static final UnicodeBlock BASIC_LATIN = 784 new UnicodeBlock("BASIC_LATIN", 785 "BASIC LATIN", 786 "BASICLATIN"); 787 788 /** 789 * Constant for the "Latin-1 Supplement" Unicode character block. 790 * @since 1.2 791 */ 792 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 793 new UnicodeBlock("LATIN_1_SUPPLEMENT", 794 "LATIN-1 SUPPLEMENT", 795 "LATIN-1SUPPLEMENT"); 796 797 /** 798 * Constant for the "Latin Extended-A" Unicode character block. 799 * @since 1.2 800 */ 801 public static final UnicodeBlock LATIN_EXTENDED_A = 802 new UnicodeBlock("LATIN_EXTENDED_A", 803 "LATIN EXTENDED-A", 804 "LATINEXTENDED-A"); 805 806 /** 807 * Constant for the "Latin Extended-B" Unicode character block. 808 * @since 1.2 809 */ 810 public static final UnicodeBlock LATIN_EXTENDED_B = 811 new UnicodeBlock("LATIN_EXTENDED_B", 812 "LATIN EXTENDED-B", 813 "LATINEXTENDED-B"); 814 815 /** 816 * Constant for the "IPA Extensions" Unicode character block. 817 * @since 1.2 818 */ 819 public static final UnicodeBlock IPA_EXTENSIONS = 820 new UnicodeBlock("IPA_EXTENSIONS", 821 "IPA EXTENSIONS", 822 "IPAEXTENSIONS"); 823 824 /** 825 * Constant for the "Spacing Modifier Letters" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 829 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 830 "SPACING MODIFIER LETTERS", 831 "SPACINGMODIFIERLETTERS"); 832 833 /** 834 * Constant for the "Combining Diacritical Marks" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 838 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 839 "COMBINING DIACRITICAL MARKS", 840 "COMBININGDIACRITICALMARKS"); 841 842 /** 843 * Constant for the "Greek and Coptic" Unicode character block. 844 * <p> 845 * This block was previously known as the "Greek" block. 846 * 847 * @since 1.2 848 */ 849 public static final UnicodeBlock GREEK = 850 new UnicodeBlock("GREEK", 851 "GREEK AND COPTIC", 852 "GREEKANDCOPTIC"); 853 854 /** 855 * Constant for the "Cyrillic" Unicode character block. 856 * @since 1.2 857 */ 858 public static final UnicodeBlock CYRILLIC = 859 new UnicodeBlock("CYRILLIC"); 860 861 /** 862 * Constant for the "Armenian" Unicode character block. 863 * @since 1.2 864 */ 865 public static final UnicodeBlock ARMENIAN = 866 new UnicodeBlock("ARMENIAN"); 867 868 /** 869 * Constant for the "Hebrew" Unicode character block. 870 * @since 1.2 871 */ 872 public static final UnicodeBlock HEBREW = 873 new UnicodeBlock("HEBREW"); 874 875 /** 876 * Constant for the "Arabic" Unicode character block. 877 * @since 1.2 878 */ 879 public static final UnicodeBlock ARABIC = 880 new UnicodeBlock("ARABIC"); 881 882 /** 883 * Constant for the "Devanagari" Unicode character block. 884 * @since 1.2 885 */ 886 public static final UnicodeBlock DEVANAGARI = 887 new UnicodeBlock("DEVANAGARI"); 888 889 /** 890 * Constant for the "Bengali" Unicode character block. 891 * @since 1.2 892 */ 893 public static final UnicodeBlock BENGALI = 894 new UnicodeBlock("BENGALI"); 895 896 /** 897 * Constant for the "Gurmukhi" Unicode character block. 898 * @since 1.2 899 */ 900 public static final UnicodeBlock GURMUKHI = 901 new UnicodeBlock("GURMUKHI"); 902 903 /** 904 * Constant for the "Gujarati" Unicode character block. 905 * @since 1.2 906 */ 907 public static final UnicodeBlock GUJARATI = 908 new UnicodeBlock("GUJARATI"); 909 910 /** 911 * Constant for the "Oriya" Unicode character block. 912 * @since 1.2 913 */ 914 public static final UnicodeBlock ORIYA = 915 new UnicodeBlock("ORIYA"); 916 917 /** 918 * Constant for the "Tamil" Unicode character block. 919 * @since 1.2 920 */ 921 public static final UnicodeBlock TAMIL = 922 new UnicodeBlock("TAMIL"); 923 924 /** 925 * Constant for the "Telugu" Unicode character block. 926 * @since 1.2 927 */ 928 public static final UnicodeBlock TELUGU = 929 new UnicodeBlock("TELUGU"); 930 931 /** 932 * Constant for the "Kannada" Unicode character block. 933 * @since 1.2 934 */ 935 public static final UnicodeBlock KANNADA = 936 new UnicodeBlock("KANNADA"); 937 938 /** 939 * Constant for the "Malayalam" Unicode character block. 940 * @since 1.2 941 */ 942 public static final UnicodeBlock MALAYALAM = 943 new UnicodeBlock("MALAYALAM"); 944 945 /** 946 * Constant for the "Thai" Unicode character block. 947 * @since 1.2 948 */ 949 public static final UnicodeBlock THAI = 950 new UnicodeBlock("THAI"); 951 952 /** 953 * Constant for the "Lao" Unicode character block. 954 * @since 1.2 955 */ 956 public static final UnicodeBlock LAO = 957 new UnicodeBlock("LAO"); 958 959 /** 960 * Constant for the "Tibetan" Unicode character block. 961 * @since 1.2 962 */ 963 public static final UnicodeBlock TIBETAN = 964 new UnicodeBlock("TIBETAN"); 965 966 /** 967 * Constant for the "Georgian" Unicode character block. 968 * @since 1.2 969 */ 970 public static final UnicodeBlock GEORGIAN = 971 new UnicodeBlock("GEORGIAN"); 972 973 /** 974 * Constant for the "Hangul Jamo" Unicode character block. 975 * @since 1.2 976 */ 977 public static final UnicodeBlock HANGUL_JAMO = 978 new UnicodeBlock("HANGUL_JAMO", 979 "HANGUL JAMO", 980 "HANGULJAMO"); 981 982 /** 983 * Constant for the "Latin Extended Additional" Unicode character block. 984 * @since 1.2 985 */ 986 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 987 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 988 "LATIN EXTENDED ADDITIONAL", 989 "LATINEXTENDEDADDITIONAL"); 990 991 /** 992 * Constant for the "Greek Extended" Unicode character block. 993 * @since 1.2 994 */ 995 public static final UnicodeBlock GREEK_EXTENDED = 996 new UnicodeBlock("GREEK_EXTENDED", 997 "GREEK EXTENDED", 998 "GREEKEXTENDED"); 999 1000 /** 1001 * Constant for the "General Punctuation" Unicode character block. 1002 * @since 1.2 1003 */ 1004 public static final UnicodeBlock GENERAL_PUNCTUATION = 1005 new UnicodeBlock("GENERAL_PUNCTUATION", 1006 "GENERAL PUNCTUATION", 1007 "GENERALPUNCTUATION"); 1008 1009 /** 1010 * Constant for the "Superscripts and Subscripts" Unicode character 1011 * block. 1012 * @since 1.2 1013 */ 1014 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1015 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1016 "SUPERSCRIPTS AND SUBSCRIPTS", 1017 "SUPERSCRIPTSANDSUBSCRIPTS"); 1018 1019 /** 1020 * Constant for the "Currency Symbols" Unicode character block. 1021 * @since 1.2 1022 */ 1023 public static final UnicodeBlock CURRENCY_SYMBOLS = 1024 new UnicodeBlock("CURRENCY_SYMBOLS", 1025 "CURRENCY SYMBOLS", 1026 "CURRENCYSYMBOLS"); 1027 1028 /** 1029 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1030 * character block. 1031 * <p> 1032 * This block was previously known as "Combining Marks for Symbols". 1033 * @since 1.2 1034 */ 1035 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1036 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1037 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1038 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1039 "COMBINING MARKS FOR SYMBOLS", 1040 "COMBININGMARKSFORSYMBOLS"); 1041 1042 /** 1043 * Constant for the "Letterlike Symbols" Unicode character block. 1044 * @since 1.2 1045 */ 1046 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1047 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1048 "LETTERLIKE SYMBOLS", 1049 "LETTERLIKESYMBOLS"); 1050 1051 /** 1052 * Constant for the "Number Forms" Unicode character block. 1053 * @since 1.2 1054 */ 1055 public static final UnicodeBlock NUMBER_FORMS = 1056 new UnicodeBlock("NUMBER_FORMS", 1057 "NUMBER FORMS", 1058 "NUMBERFORMS"); 1059 1060 /** 1061 * Constant for the "Arrows" Unicode character block. 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock ARROWS = 1065 new UnicodeBlock("ARROWS"); 1066 1067 /** 1068 * Constant for the "Mathematical Operators" Unicode character block. 1069 * @since 1.2 1070 */ 1071 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1072 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1073 "MATHEMATICAL OPERATORS", 1074 "MATHEMATICALOPERATORS"); 1075 1076 /** 1077 * Constant for the "Miscellaneous Technical" Unicode character block. 1078 * @since 1.2 1079 */ 1080 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1081 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1082 "MISCELLANEOUS TECHNICAL", 1083 "MISCELLANEOUSTECHNICAL"); 1084 1085 /** 1086 * Constant for the "Control Pictures" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock CONTROL_PICTURES = 1090 new UnicodeBlock("CONTROL_PICTURES", 1091 "CONTROL PICTURES", 1092 "CONTROLPICTURES"); 1093 1094 /** 1095 * Constant for the "Optical Character Recognition" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1099 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1100 "OPTICAL CHARACTER RECOGNITION", 1101 "OPTICALCHARACTERRECOGNITION"); 1102 1103 /** 1104 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1105 * @since 1.2 1106 */ 1107 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1108 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1109 "ENCLOSED ALPHANUMERICS", 1110 "ENCLOSEDALPHANUMERICS"); 1111 1112 /** 1113 * Constant for the "Box Drawing" Unicode character block. 1114 * @since 1.2 1115 */ 1116 public static final UnicodeBlock BOX_DRAWING = 1117 new UnicodeBlock("BOX_DRAWING", 1118 "BOX DRAWING", 1119 "BOXDRAWING"); 1120 1121 /** 1122 * Constant for the "Block Elements" Unicode character block. 1123 * @since 1.2 1124 */ 1125 public static final UnicodeBlock BLOCK_ELEMENTS = 1126 new UnicodeBlock("BLOCK_ELEMENTS", 1127 "BLOCK ELEMENTS", 1128 "BLOCKELEMENTS"); 1129 1130 /** 1131 * Constant for the "Geometric Shapes" Unicode character block. 1132 * @since 1.2 1133 */ 1134 public static final UnicodeBlock GEOMETRIC_SHAPES = 1135 new UnicodeBlock("GEOMETRIC_SHAPES", 1136 "GEOMETRIC SHAPES", 1137 "GEOMETRICSHAPES"); 1138 1139 /** 1140 * Constant for the "Miscellaneous Symbols" Unicode character block. 1141 * @since 1.2 1142 */ 1143 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1144 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1145 "MISCELLANEOUS SYMBOLS", 1146 "MISCELLANEOUSSYMBOLS"); 1147 1148 /** 1149 * Constant for the "Dingbats" Unicode character block. 1150 * @since 1.2 1151 */ 1152 public static final UnicodeBlock DINGBATS = 1153 new UnicodeBlock("DINGBATS"); 1154 1155 /** 1156 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1157 * @since 1.2 1158 */ 1159 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1160 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1161 "CJK SYMBOLS AND PUNCTUATION", 1162 "CJKSYMBOLSANDPUNCTUATION"); 1163 1164 /** 1165 * Constant for the "Hiragana" Unicode character block. 1166 * @since 1.2 1167 */ 1168 public static final UnicodeBlock HIRAGANA = 1169 new UnicodeBlock("HIRAGANA"); 1170 1171 /** 1172 * Constant for the "Katakana" Unicode character block. 1173 * @since 1.2 1174 */ 1175 public static final UnicodeBlock KATAKANA = 1176 new UnicodeBlock("KATAKANA"); 1177 1178 /** 1179 * Constant for the "Bopomofo" Unicode character block. 1180 * @since 1.2 1181 */ 1182 public static final UnicodeBlock BOPOMOFO = 1183 new UnicodeBlock("BOPOMOFO"); 1184 1185 /** 1186 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1187 * @since 1.2 1188 */ 1189 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1190 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1191 "HANGUL COMPATIBILITY JAMO", 1192 "HANGULCOMPATIBILITYJAMO"); 1193 1194 /** 1195 * Constant for the "Kanbun" Unicode character block. 1196 * @since 1.2 1197 */ 1198 public static final UnicodeBlock KANBUN = 1199 new UnicodeBlock("KANBUN"); 1200 1201 /** 1202 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1203 * @since 1.2 1204 */ 1205 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1206 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1207 "ENCLOSED CJK LETTERS AND MONTHS", 1208 "ENCLOSEDCJKLETTERSANDMONTHS"); 1209 1210 /** 1211 * Constant for the "CJK Compatibility" Unicode character block. 1212 * @since 1.2 1213 */ 1214 public static final UnicodeBlock CJK_COMPATIBILITY = 1215 new UnicodeBlock("CJK_COMPATIBILITY", 1216 "CJK COMPATIBILITY", 1217 "CJKCOMPATIBILITY"); 1218 1219 /** 1220 * Constant for the "CJK Unified Ideographs" Unicode character block. 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1224 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1225 "CJK UNIFIED IDEOGRAPHS", 1226 "CJKUNIFIEDIDEOGRAPHS"); 1227 1228 /** 1229 * Constant for the "Hangul Syllables" Unicode character block. 1230 * @since 1.2 1231 */ 1232 public static final UnicodeBlock HANGUL_SYLLABLES = 1233 new UnicodeBlock("HANGUL_SYLLABLES", 1234 "HANGUL SYLLABLES", 1235 "HANGULSYLLABLES"); 1236 1237 /** 1238 * Constant for the "Private Use Area" Unicode character block. 1239 * @since 1.2 1240 */ 1241 public static final UnicodeBlock PRIVATE_USE_AREA = 1242 new UnicodeBlock("PRIVATE_USE_AREA", 1243 "PRIVATE USE AREA", 1244 "PRIVATEUSEAREA"); 1245 1246 /** 1247 * Constant for the "CJK Compatibility Ideographs" Unicode character 1248 * block. 1249 * @since 1.2 1250 */ 1251 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1252 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1253 "CJK COMPATIBILITY IDEOGRAPHS", 1254 "CJKCOMPATIBILITYIDEOGRAPHS"); 1255 1256 /** 1257 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1258 * @since 1.2 1259 */ 1260 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1261 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1262 "ALPHABETIC PRESENTATION FORMS", 1263 "ALPHABETICPRESENTATIONFORMS"); 1264 1265 /** 1266 * Constant for the "Arabic Presentation Forms-A" Unicode character 1267 * block. 1268 * @since 1.2 1269 */ 1270 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1271 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1272 "ARABIC PRESENTATION FORMS-A", 1273 "ARABICPRESENTATIONFORMS-A"); 1274 1275 /** 1276 * Constant for the "Combining Half Marks" Unicode character block. 1277 * @since 1.2 1278 */ 1279 public static final UnicodeBlock COMBINING_HALF_MARKS = 1280 new UnicodeBlock("COMBINING_HALF_MARKS", 1281 "COMBINING HALF MARKS", 1282 "COMBININGHALFMARKS"); 1283 1284 /** 1285 * Constant for the "CJK Compatibility Forms" Unicode character block. 1286 * @since 1.2 1287 */ 1288 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1289 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1290 "CJK COMPATIBILITY FORMS", 1291 "CJKCOMPATIBILITYFORMS"); 1292 1293 /** 1294 * Constant for the "Small Form Variants" Unicode character block. 1295 * @since 1.2 1296 */ 1297 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1298 new UnicodeBlock("SMALL_FORM_VARIANTS", 1299 "SMALL FORM VARIANTS", 1300 "SMALLFORMVARIANTS"); 1301 1302 /** 1303 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1304 * @since 1.2 1305 */ 1306 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1307 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1308 "ARABIC PRESENTATION FORMS-B", 1309 "ARABICPRESENTATIONFORMS-B"); 1310 1311 /** 1312 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1313 * block. 1314 * @since 1.2 1315 */ 1316 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1317 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1318 "HALFWIDTH AND FULLWIDTH FORMS", 1319 "HALFWIDTHANDFULLWIDTHFORMS"); 1320 1321 /** 1322 * Constant for the "Specials" Unicode character block. 1323 * @since 1.2 1324 */ 1325 public static final UnicodeBlock SPECIALS = 1326 new UnicodeBlock("SPECIALS"); 1327 1328 /** 1329 * @deprecated 1330 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1331 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1332 * These constants match the block definitions of the Unicode Standard. 1333 * The {@link #of(char)} and {@link #of(int)} methods return the 1334 * standard constants. 1335 */ 1336 @Deprecated(since="1.5") 1337 public static final UnicodeBlock SURROGATES_AREA = 1338 new UnicodeBlock("SURROGATES_AREA"); 1339 1340 /** 1341 * Constant for the "Syriac" Unicode character block. 1342 * @since 1.4 1343 */ 1344 public static final UnicodeBlock SYRIAC = 1345 new UnicodeBlock("SYRIAC"); 1346 1347 /** 1348 * Constant for the "Thaana" Unicode character block. 1349 * @since 1.4 1350 */ 1351 public static final UnicodeBlock THAANA = 1352 new UnicodeBlock("THAANA"); 1353 1354 /** 1355 * Constant for the "Sinhala" Unicode character block. 1356 * @since 1.4 1357 */ 1358 public static final UnicodeBlock SINHALA = 1359 new UnicodeBlock("SINHALA"); 1360 1361 /** 1362 * Constant for the "Myanmar" Unicode character block. 1363 * @since 1.4 1364 */ 1365 public static final UnicodeBlock MYANMAR = 1366 new UnicodeBlock("MYANMAR"); 1367 1368 /** 1369 * Constant for the "Ethiopic" Unicode character block. 1370 * @since 1.4 1371 */ 1372 public static final UnicodeBlock ETHIOPIC = 1373 new UnicodeBlock("ETHIOPIC"); 1374 1375 /** 1376 * Constant for the "Cherokee" Unicode character block. 1377 * @since 1.4 1378 */ 1379 public static final UnicodeBlock CHEROKEE = 1380 new UnicodeBlock("CHEROKEE"); 1381 1382 /** 1383 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1384 * @since 1.4 1385 */ 1386 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1387 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1388 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1389 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1390 1391 /** 1392 * Constant for the "Ogham" Unicode character block. 1393 * @since 1.4 1394 */ 1395 public static final UnicodeBlock OGHAM = 1396 new UnicodeBlock("OGHAM"); 1397 1398 /** 1399 * Constant for the "Runic" Unicode character block. 1400 * @since 1.4 1401 */ 1402 public static final UnicodeBlock RUNIC = 1403 new UnicodeBlock("RUNIC"); 1404 1405 /** 1406 * Constant for the "Khmer" Unicode character block. 1407 * @since 1.4 1408 */ 1409 public static final UnicodeBlock KHMER = 1410 new UnicodeBlock("KHMER"); 1411 1412 /** 1413 * Constant for the "Mongolian" Unicode character block. 1414 * @since 1.4 1415 */ 1416 public static final UnicodeBlock MONGOLIAN = 1417 new UnicodeBlock("MONGOLIAN"); 1418 1419 /** 1420 * Constant for the "Braille Patterns" Unicode character block. 1421 * @since 1.4 1422 */ 1423 public static final UnicodeBlock BRAILLE_PATTERNS = 1424 new UnicodeBlock("BRAILLE_PATTERNS", 1425 "BRAILLE PATTERNS", 1426 "BRAILLEPATTERNS"); 1427 1428 /** 1429 * Constant for the "CJK Radicals Supplement" Unicode character block. 1430 * @since 1.4 1431 */ 1432 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1433 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1434 "CJK RADICALS SUPPLEMENT", 1435 "CJKRADICALSSUPPLEMENT"); 1436 1437 /** 1438 * Constant for the "Kangxi Radicals" Unicode character block. 1439 * @since 1.4 1440 */ 1441 public static final UnicodeBlock KANGXI_RADICALS = 1442 new UnicodeBlock("KANGXI_RADICALS", 1443 "KANGXI RADICALS", 1444 "KANGXIRADICALS"); 1445 1446 /** 1447 * Constant for the "Ideographic Description Characters" Unicode character block. 1448 * @since 1.4 1449 */ 1450 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1451 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1452 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1453 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1454 1455 /** 1456 * Constant for the "Bopomofo Extended" Unicode character block. 1457 * @since 1.4 1458 */ 1459 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1460 new UnicodeBlock("BOPOMOFO_EXTENDED", 1461 "BOPOMOFO EXTENDED", 1462 "BOPOMOFOEXTENDED"); 1463 1464 /** 1465 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1466 * @since 1.4 1467 */ 1468 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1469 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1470 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1471 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1472 1473 /** 1474 * Constant for the "Yi Syllables" Unicode character block. 1475 * @since 1.4 1476 */ 1477 public static final UnicodeBlock YI_SYLLABLES = 1478 new UnicodeBlock("YI_SYLLABLES", 1479 "YI SYLLABLES", 1480 "YISYLLABLES"); 1481 1482 /** 1483 * Constant for the "Yi Radicals" Unicode character block. 1484 * @since 1.4 1485 */ 1486 public static final UnicodeBlock YI_RADICALS = 1487 new UnicodeBlock("YI_RADICALS", 1488 "YI RADICALS", 1489 "YIRADICALS"); 1490 1491 /** 1492 * Constant for the "Cyrillic Supplement" Unicode character block. 1493 * This block was previously known as the "Cyrillic Supplementary" block. 1494 * @since 1.5 1495 */ 1496 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1497 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1498 "CYRILLIC SUPPLEMENTARY", 1499 "CYRILLICSUPPLEMENTARY", 1500 "CYRILLIC SUPPLEMENT", 1501 "CYRILLICSUPPLEMENT"); 1502 1503 /** 1504 * Constant for the "Tagalog" Unicode character block. 1505 * @since 1.5 1506 */ 1507 public static final UnicodeBlock TAGALOG = 1508 new UnicodeBlock("TAGALOG"); 1509 1510 /** 1511 * Constant for the "Hanunoo" Unicode character block. 1512 * @since 1.5 1513 */ 1514 public static final UnicodeBlock HANUNOO = 1515 new UnicodeBlock("HANUNOO"); 1516 1517 /** 1518 * Constant for the "Buhid" Unicode character block. 1519 * @since 1.5 1520 */ 1521 public static final UnicodeBlock BUHID = 1522 new UnicodeBlock("BUHID"); 1523 1524 /** 1525 * Constant for the "Tagbanwa" Unicode character block. 1526 * @since 1.5 1527 */ 1528 public static final UnicodeBlock TAGBANWA = 1529 new UnicodeBlock("TAGBANWA"); 1530 1531 /** 1532 * Constant for the "Limbu" Unicode character block. 1533 * @since 1.5 1534 */ 1535 public static final UnicodeBlock LIMBU = 1536 new UnicodeBlock("LIMBU"); 1537 1538 /** 1539 * Constant for the "Tai Le" Unicode character block. 1540 * @since 1.5 1541 */ 1542 public static final UnicodeBlock TAI_LE = 1543 new UnicodeBlock("TAI_LE", 1544 "TAI LE", 1545 "TAILE"); 1546 1547 /** 1548 * Constant for the "Khmer Symbols" Unicode character block. 1549 * @since 1.5 1550 */ 1551 public static final UnicodeBlock KHMER_SYMBOLS = 1552 new UnicodeBlock("KHMER_SYMBOLS", 1553 "KHMER SYMBOLS", 1554 "KHMERSYMBOLS"); 1555 1556 /** 1557 * Constant for the "Phonetic Extensions" Unicode character block. 1558 * @since 1.5 1559 */ 1560 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1561 new UnicodeBlock("PHONETIC_EXTENSIONS", 1562 "PHONETIC EXTENSIONS", 1563 "PHONETICEXTENSIONS"); 1564 1565 /** 1566 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1567 * @since 1.5 1568 */ 1569 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1570 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1571 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1572 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1573 1574 /** 1575 * Constant for the "Supplemental Arrows-A" Unicode character block. 1576 * @since 1.5 1577 */ 1578 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1579 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1580 "SUPPLEMENTAL ARROWS-A", 1581 "SUPPLEMENTALARROWS-A"); 1582 1583 /** 1584 * Constant for the "Supplemental Arrows-B" Unicode character block. 1585 * @since 1.5 1586 */ 1587 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1588 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1589 "SUPPLEMENTAL ARROWS-B", 1590 "SUPPLEMENTALARROWS-B"); 1591 1592 /** 1593 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1594 * character block. 1595 * @since 1.5 1596 */ 1597 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1598 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1599 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1600 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1601 1602 /** 1603 * Constant for the "Supplemental Mathematical Operators" Unicode 1604 * character block. 1605 * @since 1.5 1606 */ 1607 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1608 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1609 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1610 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1611 1612 /** 1613 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1614 * block. 1615 * @since 1.5 1616 */ 1617 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1618 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1619 "MISCELLANEOUS SYMBOLS AND ARROWS", 1620 "MISCELLANEOUSSYMBOLSANDARROWS"); 1621 1622 /** 1623 * Constant for the "Katakana Phonetic Extensions" Unicode character 1624 * block. 1625 * @since 1.5 1626 */ 1627 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1628 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1629 "KATAKANA PHONETIC EXTENSIONS", 1630 "KATAKANAPHONETICEXTENSIONS"); 1631 1632 /** 1633 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1634 * @since 1.5 1635 */ 1636 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1637 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1638 "YIJING HEXAGRAM SYMBOLS", 1639 "YIJINGHEXAGRAMSYMBOLS"); 1640 1641 /** 1642 * Constant for the "Variation Selectors" Unicode character block. 1643 * @since 1.5 1644 */ 1645 public static final UnicodeBlock VARIATION_SELECTORS = 1646 new UnicodeBlock("VARIATION_SELECTORS", 1647 "VARIATION SELECTORS", 1648 "VARIATIONSELECTORS"); 1649 1650 /** 1651 * Constant for the "Linear B Syllabary" Unicode character block. 1652 * @since 1.5 1653 */ 1654 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1655 new UnicodeBlock("LINEAR_B_SYLLABARY", 1656 "LINEAR B SYLLABARY", 1657 "LINEARBSYLLABARY"); 1658 1659 /** 1660 * Constant for the "Linear B Ideograms" Unicode character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1664 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1665 "LINEAR B IDEOGRAMS", 1666 "LINEARBIDEOGRAMS"); 1667 1668 /** 1669 * Constant for the "Aegean Numbers" Unicode character block. 1670 * @since 1.5 1671 */ 1672 public static final UnicodeBlock AEGEAN_NUMBERS = 1673 new UnicodeBlock("AEGEAN_NUMBERS", 1674 "AEGEAN NUMBERS", 1675 "AEGEANNUMBERS"); 1676 1677 /** 1678 * Constant for the "Old Italic" Unicode character block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock OLD_ITALIC = 1682 new UnicodeBlock("OLD_ITALIC", 1683 "OLD ITALIC", 1684 "OLDITALIC"); 1685 1686 /** 1687 * Constant for the "Gothic" Unicode character block. 1688 * @since 1.5 1689 */ 1690 public static final UnicodeBlock GOTHIC = 1691 new UnicodeBlock("GOTHIC"); 1692 1693 /** 1694 * Constant for the "Ugaritic" Unicode character block. 1695 * @since 1.5 1696 */ 1697 public static final UnicodeBlock UGARITIC = 1698 new UnicodeBlock("UGARITIC"); 1699 1700 /** 1701 * Constant for the "Deseret" Unicode character block. 1702 * @since 1.5 1703 */ 1704 public static final UnicodeBlock DESERET = 1705 new UnicodeBlock("DESERET"); 1706 1707 /** 1708 * Constant for the "Shavian" Unicode character block. 1709 * @since 1.5 1710 */ 1711 public static final UnicodeBlock SHAVIAN = 1712 new UnicodeBlock("SHAVIAN"); 1713 1714 /** 1715 * Constant for the "Osmanya" Unicode character block. 1716 * @since 1.5 1717 */ 1718 public static final UnicodeBlock OSMANYA = 1719 new UnicodeBlock("OSMANYA"); 1720 1721 /** 1722 * Constant for the "Cypriot Syllabary" Unicode character block. 1723 * @since 1.5 1724 */ 1725 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1726 new UnicodeBlock("CYPRIOT_SYLLABARY", 1727 "CYPRIOT SYLLABARY", 1728 "CYPRIOTSYLLABARY"); 1729 1730 /** 1731 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1732 * @since 1.5 1733 */ 1734 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1735 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1736 "BYZANTINE MUSICAL SYMBOLS", 1737 "BYZANTINEMUSICALSYMBOLS"); 1738 1739 /** 1740 * Constant for the "Musical Symbols" Unicode character block. 1741 * @since 1.5 1742 */ 1743 public static final UnicodeBlock MUSICAL_SYMBOLS = 1744 new UnicodeBlock("MUSICAL_SYMBOLS", 1745 "MUSICAL SYMBOLS", 1746 "MUSICALSYMBOLS"); 1747 1748 /** 1749 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1750 * @since 1.5 1751 */ 1752 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1753 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1754 "TAI XUAN JING SYMBOLS", 1755 "TAIXUANJINGSYMBOLS"); 1756 1757 /** 1758 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1759 * character block. 1760 * @since 1.5 1761 */ 1762 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1763 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1764 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1765 "MATHEMATICALALPHANUMERICSYMBOLS"); 1766 1767 /** 1768 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1769 * character block. 1770 * @since 1.5 1771 */ 1772 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1773 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1774 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1775 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1776 1777 /** 1778 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1779 * @since 1.5 1780 */ 1781 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1782 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1783 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1784 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1785 1786 /** 1787 * Constant for the "Tags" Unicode character block. 1788 * @since 1.5 1789 */ 1790 public static final UnicodeBlock TAGS = 1791 new UnicodeBlock("TAGS"); 1792 1793 /** 1794 * Constant for the "Variation Selectors Supplement" Unicode character 1795 * block. 1796 * @since 1.5 1797 */ 1798 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1799 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1800 "VARIATION SELECTORS SUPPLEMENT", 1801 "VARIATIONSELECTORSSUPPLEMENT"); 1802 1803 /** 1804 * Constant for the "Supplementary Private Use Area-A" Unicode character 1805 * block. 1806 * @since 1.5 1807 */ 1808 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1809 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1810 "SUPPLEMENTARY PRIVATE USE AREA-A", 1811 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1812 1813 /** 1814 * Constant for the "Supplementary Private Use Area-B" Unicode character 1815 * block. 1816 * @since 1.5 1817 */ 1818 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1819 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1820 "SUPPLEMENTARY PRIVATE USE AREA-B", 1821 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1822 1823 /** 1824 * Constant for the "High Surrogates" Unicode character block. 1825 * This block represents codepoint values in the high surrogate 1826 * range: U+D800 through U+DB7F 1827 * 1828 * @since 1.5 1829 */ 1830 public static final UnicodeBlock HIGH_SURROGATES = 1831 new UnicodeBlock("HIGH_SURROGATES", 1832 "HIGH SURROGATES", 1833 "HIGHSURROGATES"); 1834 1835 /** 1836 * Constant for the "High Private Use Surrogates" Unicode character 1837 * block. 1838 * This block represents codepoint values in the private use high 1839 * surrogate range: U+DB80 through U+DBFF 1840 * 1841 * @since 1.5 1842 */ 1843 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1844 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1845 "HIGH PRIVATE USE SURROGATES", 1846 "HIGHPRIVATEUSESURROGATES"); 1847 1848 /** 1849 * Constant for the "Low Surrogates" Unicode character block. 1850 * This block represents codepoint values in the low surrogate 1851 * range: U+DC00 through U+DFFF 1852 * 1853 * @since 1.5 1854 */ 1855 public static final UnicodeBlock LOW_SURROGATES = 1856 new UnicodeBlock("LOW_SURROGATES", 1857 "LOW SURROGATES", 1858 "LOWSURROGATES"); 1859 1860 /** 1861 * Constant for the "Arabic Supplement" Unicode character block. 1862 * @since 1.7 1863 */ 1864 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1865 new UnicodeBlock("ARABIC_SUPPLEMENT", 1866 "ARABIC SUPPLEMENT", 1867 "ARABICSUPPLEMENT"); 1868 1869 /** 1870 * Constant for the "NKo" Unicode character block. 1871 * @since 1.7 1872 */ 1873 public static final UnicodeBlock NKO = 1874 new UnicodeBlock("NKO"); 1875 1876 /** 1877 * Constant for the "Samaritan" Unicode character block. 1878 * @since 1.7 1879 */ 1880 public static final UnicodeBlock SAMARITAN = 1881 new UnicodeBlock("SAMARITAN"); 1882 1883 /** 1884 * Constant for the "Mandaic" Unicode character block. 1885 * @since 1.7 1886 */ 1887 public static final UnicodeBlock MANDAIC = 1888 new UnicodeBlock("MANDAIC"); 1889 1890 /** 1891 * Constant for the "Ethiopic Supplement" Unicode character block. 1892 * @since 1.7 1893 */ 1894 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1895 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1896 "ETHIOPIC SUPPLEMENT", 1897 "ETHIOPICSUPPLEMENT"); 1898 1899 /** 1900 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1901 * Unicode character block. 1902 * @since 1.7 1903 */ 1904 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1905 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1906 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1907 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1908 1909 /** 1910 * Constant for the "New Tai Lue" Unicode character block. 1911 * @since 1.7 1912 */ 1913 public static final UnicodeBlock NEW_TAI_LUE = 1914 new UnicodeBlock("NEW_TAI_LUE", 1915 "NEW TAI LUE", 1916 "NEWTAILUE"); 1917 1918 /** 1919 * Constant for the "Buginese" Unicode character block. 1920 * @since 1.7 1921 */ 1922 public static final UnicodeBlock BUGINESE = 1923 new UnicodeBlock("BUGINESE"); 1924 1925 /** 1926 * Constant for the "Tai Tham" Unicode character block. 1927 * @since 1.7 1928 */ 1929 public static final UnicodeBlock TAI_THAM = 1930 new UnicodeBlock("TAI_THAM", 1931 "TAI THAM", 1932 "TAITHAM"); 1933 1934 /** 1935 * Constant for the "Balinese" Unicode character block. 1936 * @since 1.7 1937 */ 1938 public static final UnicodeBlock BALINESE = 1939 new UnicodeBlock("BALINESE"); 1940 1941 /** 1942 * Constant for the "Sundanese" Unicode character block. 1943 * @since 1.7 1944 */ 1945 public static final UnicodeBlock SUNDANESE = 1946 new UnicodeBlock("SUNDANESE"); 1947 1948 /** 1949 * Constant for the "Batak" Unicode character block. 1950 * @since 1.7 1951 */ 1952 public static final UnicodeBlock BATAK = 1953 new UnicodeBlock("BATAK"); 1954 1955 /** 1956 * Constant for the "Lepcha" Unicode character block. 1957 * @since 1.7 1958 */ 1959 public static final UnicodeBlock LEPCHA = 1960 new UnicodeBlock("LEPCHA"); 1961 1962 /** 1963 * Constant for the "Ol Chiki" Unicode character block. 1964 * @since 1.7 1965 */ 1966 public static final UnicodeBlock OL_CHIKI = 1967 new UnicodeBlock("OL_CHIKI", 1968 "OL CHIKI", 1969 "OLCHIKI"); 1970 1971 /** 1972 * Constant for the "Vedic Extensions" Unicode character block. 1973 * @since 1.7 1974 */ 1975 public static final UnicodeBlock VEDIC_EXTENSIONS = 1976 new UnicodeBlock("VEDIC_EXTENSIONS", 1977 "VEDIC EXTENSIONS", 1978 "VEDICEXTENSIONS"); 1979 1980 /** 1981 * Constant for the "Phonetic Extensions Supplement" Unicode character 1982 * block. 1983 * @since 1.7 1984 */ 1985 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1986 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1987 "PHONETIC EXTENSIONS SUPPLEMENT", 1988 "PHONETICEXTENSIONSSUPPLEMENT"); 1989 1990 /** 1991 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1992 * character block. 1993 * @since 1.7 1994 */ 1995 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1996 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1997 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1998 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1999 2000 /** 2001 * Constant for the "Glagolitic" Unicode character block. 2002 * @since 1.7 2003 */ 2004 public static final UnicodeBlock GLAGOLITIC = 2005 new UnicodeBlock("GLAGOLITIC"); 2006 2007 /** 2008 * Constant for the "Latin Extended-C" Unicode character block. 2009 * @since 1.7 2010 */ 2011 public static final UnicodeBlock LATIN_EXTENDED_C = 2012 new UnicodeBlock("LATIN_EXTENDED_C", 2013 "LATIN EXTENDED-C", 2014 "LATINEXTENDED-C"); 2015 2016 /** 2017 * Constant for the "Coptic" Unicode character block. 2018 * @since 1.7 2019 */ 2020 public static final UnicodeBlock COPTIC = 2021 new UnicodeBlock("COPTIC"); 2022 2023 /** 2024 * Constant for the "Georgian Supplement" Unicode character block. 2025 * @since 1.7 2026 */ 2027 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2028 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2029 "GEORGIAN SUPPLEMENT", 2030 "GEORGIANSUPPLEMENT"); 2031 2032 /** 2033 * Constant for the "Tifinagh" Unicode character block. 2034 * @since 1.7 2035 */ 2036 public static final UnicodeBlock TIFINAGH = 2037 new UnicodeBlock("TIFINAGH"); 2038 2039 /** 2040 * Constant for the "Ethiopic Extended" Unicode character block. 2041 * @since 1.7 2042 */ 2043 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2044 new UnicodeBlock("ETHIOPIC_EXTENDED", 2045 "ETHIOPIC EXTENDED", 2046 "ETHIOPICEXTENDED"); 2047 2048 /** 2049 * Constant for the "Cyrillic Extended-A" Unicode character block. 2050 * @since 1.7 2051 */ 2052 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2053 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2054 "CYRILLIC EXTENDED-A", 2055 "CYRILLICEXTENDED-A"); 2056 2057 /** 2058 * Constant for the "Supplemental Punctuation" Unicode character block. 2059 * @since 1.7 2060 */ 2061 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2062 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2063 "SUPPLEMENTAL PUNCTUATION", 2064 "SUPPLEMENTALPUNCTUATION"); 2065 2066 /** 2067 * Constant for the "CJK Strokes" Unicode character block. 2068 * @since 1.7 2069 */ 2070 public static final UnicodeBlock CJK_STROKES = 2071 new UnicodeBlock("CJK_STROKES", 2072 "CJK STROKES", 2073 "CJKSTROKES"); 2074 2075 /** 2076 * Constant for the "Lisu" Unicode character block. 2077 * @since 1.7 2078 */ 2079 public static final UnicodeBlock LISU = 2080 new UnicodeBlock("LISU"); 2081 2082 /** 2083 * Constant for the "Vai" Unicode character block. 2084 * @since 1.7 2085 */ 2086 public static final UnicodeBlock VAI = 2087 new UnicodeBlock("VAI"); 2088 2089 /** 2090 * Constant for the "Cyrillic Extended-B" Unicode character block. 2091 * @since 1.7 2092 */ 2093 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2094 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2095 "CYRILLIC EXTENDED-B", 2096 "CYRILLICEXTENDED-B"); 2097 2098 /** 2099 * Constant for the "Bamum" Unicode character block. 2100 * @since 1.7 2101 */ 2102 public static final UnicodeBlock BAMUM = 2103 new UnicodeBlock("BAMUM"); 2104 2105 /** 2106 * Constant for the "Modifier Tone Letters" Unicode character block. 2107 * @since 1.7 2108 */ 2109 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2110 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2111 "MODIFIER TONE LETTERS", 2112 "MODIFIERTONELETTERS"); 2113 2114 /** 2115 * Constant for the "Latin Extended-D" Unicode character block. 2116 * @since 1.7 2117 */ 2118 public static final UnicodeBlock LATIN_EXTENDED_D = 2119 new UnicodeBlock("LATIN_EXTENDED_D", 2120 "LATIN EXTENDED-D", 2121 "LATINEXTENDED-D"); 2122 2123 /** 2124 * Constant for the "Syloti Nagri" Unicode character block. 2125 * @since 1.7 2126 */ 2127 public static final UnicodeBlock SYLOTI_NAGRI = 2128 new UnicodeBlock("SYLOTI_NAGRI", 2129 "SYLOTI NAGRI", 2130 "SYLOTINAGRI"); 2131 2132 /** 2133 * Constant for the "Common Indic Number Forms" Unicode character block. 2134 * @since 1.7 2135 */ 2136 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2137 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2138 "COMMON INDIC NUMBER FORMS", 2139 "COMMONINDICNUMBERFORMS"); 2140 2141 /** 2142 * Constant for the "Phags-pa" Unicode character block. 2143 * @since 1.7 2144 */ 2145 public static final UnicodeBlock PHAGS_PA = 2146 new UnicodeBlock("PHAGS_PA", 2147 "PHAGS-PA"); 2148 2149 /** 2150 * Constant for the "Saurashtra" Unicode character block. 2151 * @since 1.7 2152 */ 2153 public static final UnicodeBlock SAURASHTRA = 2154 new UnicodeBlock("SAURASHTRA"); 2155 2156 /** 2157 * Constant for the "Devanagari Extended" Unicode character block. 2158 * @since 1.7 2159 */ 2160 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2161 new UnicodeBlock("DEVANAGARI_EXTENDED", 2162 "DEVANAGARI EXTENDED", 2163 "DEVANAGARIEXTENDED"); 2164 2165 /** 2166 * Constant for the "Kayah Li" Unicode character block. 2167 * @since 1.7 2168 */ 2169 public static final UnicodeBlock KAYAH_LI = 2170 new UnicodeBlock("KAYAH_LI", 2171 "KAYAH LI", 2172 "KAYAHLI"); 2173 2174 /** 2175 * Constant for the "Rejang" Unicode character block. 2176 * @since 1.7 2177 */ 2178 public static final UnicodeBlock REJANG = 2179 new UnicodeBlock("REJANG"); 2180 2181 /** 2182 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2183 * @since 1.7 2184 */ 2185 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2186 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2187 "HANGUL JAMO EXTENDED-A", 2188 "HANGULJAMOEXTENDED-A"); 2189 2190 /** 2191 * Constant for the "Javanese" Unicode character block. 2192 * @since 1.7 2193 */ 2194 public static final UnicodeBlock JAVANESE = 2195 new UnicodeBlock("JAVANESE"); 2196 2197 /** 2198 * Constant for the "Cham" Unicode character block. 2199 * @since 1.7 2200 */ 2201 public static final UnicodeBlock CHAM = 2202 new UnicodeBlock("CHAM"); 2203 2204 /** 2205 * Constant for the "Myanmar Extended-A" Unicode character block. 2206 * @since 1.7 2207 */ 2208 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2209 new UnicodeBlock("MYANMAR_EXTENDED_A", 2210 "MYANMAR EXTENDED-A", 2211 "MYANMAREXTENDED-A"); 2212 2213 /** 2214 * Constant for the "Tai Viet" Unicode character block. 2215 * @since 1.7 2216 */ 2217 public static final UnicodeBlock TAI_VIET = 2218 new UnicodeBlock("TAI_VIET", 2219 "TAI VIET", 2220 "TAIVIET"); 2221 2222 /** 2223 * Constant for the "Ethiopic Extended-A" Unicode character block. 2224 * @since 1.7 2225 */ 2226 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2227 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2228 "ETHIOPIC EXTENDED-A", 2229 "ETHIOPICEXTENDED-A"); 2230 2231 /** 2232 * Constant for the "Meetei Mayek" Unicode character block. 2233 * @since 1.7 2234 */ 2235 public static final UnicodeBlock MEETEI_MAYEK = 2236 new UnicodeBlock("MEETEI_MAYEK", 2237 "MEETEI MAYEK", 2238 "MEETEIMAYEK"); 2239 2240 /** 2241 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2242 * @since 1.7 2243 */ 2244 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2245 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2246 "HANGUL JAMO EXTENDED-B", 2247 "HANGULJAMOEXTENDED-B"); 2248 2249 /** 2250 * Constant for the "Vertical Forms" Unicode character block. 2251 * @since 1.7 2252 */ 2253 public static final UnicodeBlock VERTICAL_FORMS = 2254 new UnicodeBlock("VERTICAL_FORMS", 2255 "VERTICAL FORMS", 2256 "VERTICALFORMS"); 2257 2258 /** 2259 * Constant for the "Ancient Greek Numbers" Unicode character block. 2260 * @since 1.7 2261 */ 2262 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2263 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2264 "ANCIENT GREEK NUMBERS", 2265 "ANCIENTGREEKNUMBERS"); 2266 2267 /** 2268 * Constant for the "Ancient Symbols" Unicode character block. 2269 * @since 1.7 2270 */ 2271 public static final UnicodeBlock ANCIENT_SYMBOLS = 2272 new UnicodeBlock("ANCIENT_SYMBOLS", 2273 "ANCIENT SYMBOLS", 2274 "ANCIENTSYMBOLS"); 2275 2276 /** 2277 * Constant for the "Phaistos Disc" Unicode character block. 2278 * @since 1.7 2279 */ 2280 public static final UnicodeBlock PHAISTOS_DISC = 2281 new UnicodeBlock("PHAISTOS_DISC", 2282 "PHAISTOS DISC", 2283 "PHAISTOSDISC"); 2284 2285 /** 2286 * Constant for the "Lycian" Unicode character block. 2287 * @since 1.7 2288 */ 2289 public static final UnicodeBlock LYCIAN = 2290 new UnicodeBlock("LYCIAN"); 2291 2292 /** 2293 * Constant for the "Carian" Unicode character block. 2294 * @since 1.7 2295 */ 2296 public static final UnicodeBlock CARIAN = 2297 new UnicodeBlock("CARIAN"); 2298 2299 /** 2300 * Constant for the "Old Persian" Unicode character block. 2301 * @since 1.7 2302 */ 2303 public static final UnicodeBlock OLD_PERSIAN = 2304 new UnicodeBlock("OLD_PERSIAN", 2305 "OLD PERSIAN", 2306 "OLDPERSIAN"); 2307 2308 /** 2309 * Constant for the "Imperial Aramaic" Unicode character block. 2310 * @since 1.7 2311 */ 2312 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2313 new UnicodeBlock("IMPERIAL_ARAMAIC", 2314 "IMPERIAL ARAMAIC", 2315 "IMPERIALARAMAIC"); 2316 2317 /** 2318 * Constant for the "Phoenician" Unicode character block. 2319 * @since 1.7 2320 */ 2321 public static final UnicodeBlock PHOENICIAN = 2322 new UnicodeBlock("PHOENICIAN"); 2323 2324 /** 2325 * Constant for the "Lydian" Unicode character block. 2326 * @since 1.7 2327 */ 2328 public static final UnicodeBlock LYDIAN = 2329 new UnicodeBlock("LYDIAN"); 2330 2331 /** 2332 * Constant for the "Kharoshthi" Unicode character block. 2333 * @since 1.7 2334 */ 2335 public static final UnicodeBlock KHAROSHTHI = 2336 new UnicodeBlock("KHAROSHTHI"); 2337 2338 /** 2339 * Constant for the "Old South Arabian" Unicode character block. 2340 * @since 1.7 2341 */ 2342 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2343 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2344 "OLD SOUTH ARABIAN", 2345 "OLDSOUTHARABIAN"); 2346 2347 /** 2348 * Constant for the "Avestan" Unicode character block. 2349 * @since 1.7 2350 */ 2351 public static final UnicodeBlock AVESTAN = 2352 new UnicodeBlock("AVESTAN"); 2353 2354 /** 2355 * Constant for the "Inscriptional Parthian" Unicode character block. 2356 * @since 1.7 2357 */ 2358 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2359 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2360 "INSCRIPTIONAL PARTHIAN", 2361 "INSCRIPTIONALPARTHIAN"); 2362 2363 /** 2364 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2365 * @since 1.7 2366 */ 2367 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2368 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2369 "INSCRIPTIONAL PAHLAVI", 2370 "INSCRIPTIONALPAHLAVI"); 2371 2372 /** 2373 * Constant for the "Old Turkic" Unicode character block. 2374 * @since 1.7 2375 */ 2376 public static final UnicodeBlock OLD_TURKIC = 2377 new UnicodeBlock("OLD_TURKIC", 2378 "OLD TURKIC", 2379 "OLDTURKIC"); 2380 2381 /** 2382 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2383 * @since 1.7 2384 */ 2385 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2386 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2387 "RUMI NUMERAL SYMBOLS", 2388 "RUMINUMERALSYMBOLS"); 2389 2390 /** 2391 * Constant for the "Brahmi" Unicode character block. 2392 * @since 1.7 2393 */ 2394 public static final UnicodeBlock BRAHMI = 2395 new UnicodeBlock("BRAHMI"); 2396 2397 /** 2398 * Constant for the "Kaithi" Unicode character block. 2399 * @since 1.7 2400 */ 2401 public static final UnicodeBlock KAITHI = 2402 new UnicodeBlock("KAITHI"); 2403 2404 /** 2405 * Constant for the "Cuneiform" Unicode character block. 2406 * @since 1.7 2407 */ 2408 public static final UnicodeBlock CUNEIFORM = 2409 new UnicodeBlock("CUNEIFORM"); 2410 2411 /** 2412 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2413 * character block. 2414 * @since 1.7 2415 */ 2416 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2417 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2418 "CUNEIFORM NUMBERS AND PUNCTUATION", 2419 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2420 2421 /** 2422 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2423 * @since 1.7 2424 */ 2425 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2426 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2427 "EGYPTIAN HIEROGLYPHS", 2428 "EGYPTIANHIEROGLYPHS"); 2429 2430 /** 2431 * Constant for the "Bamum Supplement" Unicode character block. 2432 * @since 1.7 2433 */ 2434 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2435 new UnicodeBlock("BAMUM_SUPPLEMENT", 2436 "BAMUM SUPPLEMENT", 2437 "BAMUMSUPPLEMENT"); 2438 2439 /** 2440 * Constant for the "Kana Supplement" Unicode character block. 2441 * @since 1.7 2442 */ 2443 public static final UnicodeBlock KANA_SUPPLEMENT = 2444 new UnicodeBlock("KANA_SUPPLEMENT", 2445 "KANA SUPPLEMENT", 2446 "KANASUPPLEMENT"); 2447 2448 /** 2449 * Constant for the "Ancient Greek Musical Notation" Unicode character 2450 * block. 2451 * @since 1.7 2452 */ 2453 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2454 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2455 "ANCIENT GREEK MUSICAL NOTATION", 2456 "ANCIENTGREEKMUSICALNOTATION"); 2457 2458 /** 2459 * Constant for the "Counting Rod Numerals" Unicode character block. 2460 * @since 1.7 2461 */ 2462 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2463 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2464 "COUNTING ROD NUMERALS", 2465 "COUNTINGRODNUMERALS"); 2466 2467 /** 2468 * Constant for the "Mahjong Tiles" Unicode character block. 2469 * @since 1.7 2470 */ 2471 public static final UnicodeBlock MAHJONG_TILES = 2472 new UnicodeBlock("MAHJONG_TILES", 2473 "MAHJONG TILES", 2474 "MAHJONGTILES"); 2475 2476 /** 2477 * Constant for the "Domino Tiles" Unicode character block. 2478 * @since 1.7 2479 */ 2480 public static final UnicodeBlock DOMINO_TILES = 2481 new UnicodeBlock("DOMINO_TILES", 2482 "DOMINO TILES", 2483 "DOMINOTILES"); 2484 2485 /** 2486 * Constant for the "Playing Cards" Unicode character block. 2487 * @since 1.7 2488 */ 2489 public static final UnicodeBlock PLAYING_CARDS = 2490 new UnicodeBlock("PLAYING_CARDS", 2491 "PLAYING CARDS", 2492 "PLAYINGCARDS"); 2493 2494 /** 2495 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2496 * block. 2497 * @since 1.7 2498 */ 2499 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2500 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2501 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2502 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2503 2504 /** 2505 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2506 * block. 2507 * @since 1.7 2508 */ 2509 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2510 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2511 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2512 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2513 2514 /** 2515 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2516 * character block. 2517 * @since 1.7 2518 */ 2519 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2520 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2521 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2522 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2523 2524 /** 2525 * Constant for the "Emoticons" Unicode character block. 2526 * @since 1.7 2527 */ 2528 public static final UnicodeBlock EMOTICONS = 2529 new UnicodeBlock("EMOTICONS"); 2530 2531 /** 2532 * Constant for the "Transport And Map Symbols" Unicode character block. 2533 * @since 1.7 2534 */ 2535 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2536 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2537 "TRANSPORT AND MAP SYMBOLS", 2538 "TRANSPORTANDMAPSYMBOLS"); 2539 2540 /** 2541 * Constant for the "Alchemical Symbols" Unicode character block. 2542 * @since 1.7 2543 */ 2544 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2545 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2546 "ALCHEMICAL SYMBOLS", 2547 "ALCHEMICALSYMBOLS"); 2548 2549 /** 2550 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2551 * character block. 2552 * @since 1.7 2553 */ 2554 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2555 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2556 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2557 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2558 2559 /** 2560 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2561 * character block. 2562 * @since 1.7 2563 */ 2564 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2565 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2566 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2567 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2568 2569 /** 2570 * Constant for the "Arabic Extended-A" Unicode character block. 2571 * @since 1.8 2572 */ 2573 public static final UnicodeBlock ARABIC_EXTENDED_A = 2574 new UnicodeBlock("ARABIC_EXTENDED_A", 2575 "ARABIC EXTENDED-A", 2576 "ARABICEXTENDED-A"); 2577 2578 /** 2579 * Constant for the "Sundanese Supplement" Unicode character block. 2580 * @since 1.8 2581 */ 2582 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2583 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2584 "SUNDANESE SUPPLEMENT", 2585 "SUNDANESESUPPLEMENT"); 2586 2587 /** 2588 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2589 * @since 1.8 2590 */ 2591 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2592 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2593 "MEETEI MAYEK EXTENSIONS", 2594 "MEETEIMAYEKEXTENSIONS"); 2595 2596 /** 2597 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2598 * @since 1.8 2599 */ 2600 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2601 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2602 "MEROITIC HIEROGLYPHS", 2603 "MEROITICHIEROGLYPHS"); 2604 2605 /** 2606 * Constant for the "Meroitic Cursive" Unicode character block. 2607 * @since 1.8 2608 */ 2609 public static final UnicodeBlock MEROITIC_CURSIVE = 2610 new UnicodeBlock("MEROITIC_CURSIVE", 2611 "MEROITIC CURSIVE", 2612 "MEROITICCURSIVE"); 2613 2614 /** 2615 * Constant for the "Sora Sompeng" Unicode character block. 2616 * @since 1.8 2617 */ 2618 public static final UnicodeBlock SORA_SOMPENG = 2619 new UnicodeBlock("SORA_SOMPENG", 2620 "SORA SOMPENG", 2621 "SORASOMPENG"); 2622 2623 /** 2624 * Constant for the "Chakma" Unicode character block. 2625 * @since 1.8 2626 */ 2627 public static final UnicodeBlock CHAKMA = 2628 new UnicodeBlock("CHAKMA"); 2629 2630 /** 2631 * Constant for the "Sharada" Unicode character block. 2632 * @since 1.8 2633 */ 2634 public static final UnicodeBlock SHARADA = 2635 new UnicodeBlock("SHARADA"); 2636 2637 /** 2638 * Constant for the "Takri" Unicode character block. 2639 * @since 1.8 2640 */ 2641 public static final UnicodeBlock TAKRI = 2642 new UnicodeBlock("TAKRI"); 2643 2644 /** 2645 * Constant for the "Miao" Unicode character block. 2646 * @since 1.8 2647 */ 2648 public static final UnicodeBlock MIAO = 2649 new UnicodeBlock("MIAO"); 2650 2651 /** 2652 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2653 * character block. 2654 * @since 1.8 2655 */ 2656 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2657 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2658 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2659 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2660 2661 /** 2662 * Constant for the "Combining Diacritical Marks Extended" Unicode 2663 * character block. 2664 * @since 9 2665 */ 2666 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2667 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2668 "COMBINING DIACRITICAL MARKS EXTENDED", 2669 "COMBININGDIACRITICALMARKSEXTENDED"); 2670 2671 /** 2672 * Constant for the "Myanmar Extended-B" Unicode character block. 2673 * @since 9 2674 */ 2675 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2676 new UnicodeBlock("MYANMAR_EXTENDED_B", 2677 "MYANMAR EXTENDED-B", 2678 "MYANMAREXTENDED-B"); 2679 2680 /** 2681 * Constant for the "Latin Extended-E" Unicode character block. 2682 * @since 9 2683 */ 2684 public static final UnicodeBlock LATIN_EXTENDED_E = 2685 new UnicodeBlock("LATIN_EXTENDED_E", 2686 "LATIN EXTENDED-E", 2687 "LATINEXTENDED-E"); 2688 2689 /** 2690 * Constant for the "Coptic Epact Numbers" Unicode character block. 2691 * @since 9 2692 */ 2693 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2694 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2695 "COPTIC EPACT NUMBERS", 2696 "COPTICEPACTNUMBERS"); 2697 2698 /** 2699 * Constant for the "Old Permic" Unicode character block. 2700 * @since 9 2701 */ 2702 public static final UnicodeBlock OLD_PERMIC = 2703 new UnicodeBlock("OLD_PERMIC", 2704 "OLD PERMIC", 2705 "OLDPERMIC"); 2706 2707 /** 2708 * Constant for the "Elbasan" Unicode character block. 2709 * @since 9 2710 */ 2711 public static final UnicodeBlock ELBASAN = 2712 new UnicodeBlock("ELBASAN"); 2713 2714 /** 2715 * Constant for the "Caucasian Albanian" Unicode character block. 2716 * @since 9 2717 */ 2718 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2719 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2720 "CAUCASIAN ALBANIAN", 2721 "CAUCASIANALBANIAN"); 2722 2723 /** 2724 * Constant for the "Linear A" Unicode character block. 2725 * @since 9 2726 */ 2727 public static final UnicodeBlock LINEAR_A = 2728 new UnicodeBlock("LINEAR_A", 2729 "LINEAR A", 2730 "LINEARA"); 2731 2732 /** 2733 * Constant for the "Palmyrene" Unicode character block. 2734 * @since 9 2735 */ 2736 public static final UnicodeBlock PALMYRENE = 2737 new UnicodeBlock("PALMYRENE"); 2738 2739 /** 2740 * Constant for the "Nabataean" Unicode character block. 2741 * @since 9 2742 */ 2743 public static final UnicodeBlock NABATAEAN = 2744 new UnicodeBlock("NABATAEAN"); 2745 2746 /** 2747 * Constant for the "Old North Arabian" Unicode character block. 2748 * @since 9 2749 */ 2750 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2751 new UnicodeBlock("OLD_NORTH_ARABIAN", 2752 "OLD NORTH ARABIAN", 2753 "OLDNORTHARABIAN"); 2754 2755 /** 2756 * Constant for the "Manichaean" Unicode character block. 2757 * @since 9 2758 */ 2759 public static final UnicodeBlock MANICHAEAN = 2760 new UnicodeBlock("MANICHAEAN"); 2761 2762 /** 2763 * Constant for the "Psalter Pahlavi" Unicode character block. 2764 * @since 9 2765 */ 2766 public static final UnicodeBlock PSALTER_PAHLAVI = 2767 new UnicodeBlock("PSALTER_PAHLAVI", 2768 "PSALTER PAHLAVI", 2769 "PSALTERPAHLAVI"); 2770 2771 /** 2772 * Constant for the "Mahajani" Unicode character block. 2773 * @since 9 2774 */ 2775 public static final UnicodeBlock MAHAJANI = 2776 new UnicodeBlock("MAHAJANI"); 2777 2778 /** 2779 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2780 * @since 9 2781 */ 2782 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2783 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2784 "SINHALA ARCHAIC NUMBERS", 2785 "SINHALAARCHAICNUMBERS"); 2786 2787 /** 2788 * Constant for the "Khojki" Unicode character block. 2789 * @since 9 2790 */ 2791 public static final UnicodeBlock KHOJKI = 2792 new UnicodeBlock("KHOJKI"); 2793 2794 /** 2795 * Constant for the "Khudawadi" Unicode character block. 2796 * @since 9 2797 */ 2798 public static final UnicodeBlock KHUDAWADI = 2799 new UnicodeBlock("KHUDAWADI"); 2800 2801 /** 2802 * Constant for the "Grantha" Unicode character block. 2803 * @since 9 2804 */ 2805 public static final UnicodeBlock GRANTHA = 2806 new UnicodeBlock("GRANTHA"); 2807 2808 /** 2809 * Constant for the "Tirhuta" Unicode character block. 2810 * @since 9 2811 */ 2812 public static final UnicodeBlock TIRHUTA = 2813 new UnicodeBlock("TIRHUTA"); 2814 2815 /** 2816 * Constant for the "Siddham" Unicode character block. 2817 * @since 9 2818 */ 2819 public static final UnicodeBlock SIDDHAM = 2820 new UnicodeBlock("SIDDHAM"); 2821 2822 /** 2823 * Constant for the "Modi" Unicode character block. 2824 * @since 9 2825 */ 2826 public static final UnicodeBlock MODI = 2827 new UnicodeBlock("MODI"); 2828 2829 /** 2830 * Constant for the "Warang Citi" Unicode character block. 2831 * @since 9 2832 */ 2833 public static final UnicodeBlock WARANG_CITI = 2834 new UnicodeBlock("WARANG_CITI", 2835 "WARANG CITI", 2836 "WARANGCITI"); 2837 2838 /** 2839 * Constant for the "Pau Cin Hau" Unicode character block. 2840 * @since 9 2841 */ 2842 public static final UnicodeBlock PAU_CIN_HAU = 2843 new UnicodeBlock("PAU_CIN_HAU", 2844 "PAU CIN HAU", 2845 "PAUCINHAU"); 2846 2847 /** 2848 * Constant for the "Mro" Unicode character block. 2849 * @since 9 2850 */ 2851 public static final UnicodeBlock MRO = 2852 new UnicodeBlock("MRO"); 2853 2854 /** 2855 * Constant for the "Bassa Vah" Unicode character block. 2856 * @since 9 2857 */ 2858 public static final UnicodeBlock BASSA_VAH = 2859 new UnicodeBlock("BASSA_VAH", 2860 "BASSA VAH", 2861 "BASSAVAH"); 2862 2863 /** 2864 * Constant for the "Pahawh Hmong" Unicode character block. 2865 * @since 9 2866 */ 2867 public static final UnicodeBlock PAHAWH_HMONG = 2868 new UnicodeBlock("PAHAWH_HMONG", 2869 "PAHAWH HMONG", 2870 "PAHAWHHMONG"); 2871 2872 /** 2873 * Constant for the "Duployan" Unicode character block. 2874 * @since 9 2875 */ 2876 public static final UnicodeBlock DUPLOYAN = 2877 new UnicodeBlock("DUPLOYAN"); 2878 2879 /** 2880 * Constant for the "Shorthand Format Controls" Unicode character block. 2881 * @since 9 2882 */ 2883 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2884 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2885 "SHORTHAND FORMAT CONTROLS", 2886 "SHORTHANDFORMATCONTROLS"); 2887 2888 /** 2889 * Constant for the "Mende Kikakui" Unicode character block. 2890 * @since 9 2891 */ 2892 public static final UnicodeBlock MENDE_KIKAKUI = 2893 new UnicodeBlock("MENDE_KIKAKUI", 2894 "MENDE KIKAKUI", 2895 "MENDEKIKAKUI"); 2896 2897 /** 2898 * Constant for the "Ornamental Dingbats" Unicode character block. 2899 * @since 9 2900 */ 2901 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2902 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2903 "ORNAMENTAL DINGBATS", 2904 "ORNAMENTALDINGBATS"); 2905 2906 /** 2907 * Constant for the "Geometric Shapes Extended" Unicode character block. 2908 * @since 9 2909 */ 2910 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2911 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2912 "GEOMETRIC SHAPES EXTENDED", 2913 "GEOMETRICSHAPESEXTENDED"); 2914 2915 /** 2916 * Constant for the "Supplemental Arrows-C" Unicode character block. 2917 * @since 9 2918 */ 2919 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2920 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2921 "SUPPLEMENTAL ARROWS-C", 2922 "SUPPLEMENTALARROWS-C"); 2923 2924 /** 2925 * Constant for the "Cherokee Supplement" Unicode character block. 2926 * @since 9 2927 */ 2928 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2929 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2930 "CHEROKEE SUPPLEMENT", 2931 "CHEROKEESUPPLEMENT"); 2932 2933 /** 2934 * Constant for the "Hatran" Unicode character block. 2935 * @since 9 2936 */ 2937 public static final UnicodeBlock HATRAN = 2938 new UnicodeBlock("HATRAN"); 2939 2940 /** 2941 * Constant for the "Old Hungarian" Unicode character block. 2942 * @since 9 2943 */ 2944 public static final UnicodeBlock OLD_HUNGARIAN = 2945 new UnicodeBlock("OLD_HUNGARIAN", 2946 "OLD HUNGARIAN", 2947 "OLDHUNGARIAN"); 2948 2949 /** 2950 * Constant for the "Multani" Unicode character block. 2951 * @since 9 2952 */ 2953 public static final UnicodeBlock MULTANI = 2954 new UnicodeBlock("MULTANI"); 2955 2956 /** 2957 * Constant for the "Ahom" Unicode character block. 2958 * @since 9 2959 */ 2960 public static final UnicodeBlock AHOM = 2961 new UnicodeBlock("AHOM"); 2962 2963 /** 2964 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2965 * @since 9 2966 */ 2967 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2968 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2969 "EARLY DYNASTIC CUNEIFORM", 2970 "EARLYDYNASTICCUNEIFORM"); 2971 2972 /** 2973 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2974 * @since 9 2975 */ 2976 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2977 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2978 "ANATOLIAN HIEROGLYPHS", 2979 "ANATOLIANHIEROGLYPHS"); 2980 2981 /** 2982 * Constant for the "Sutton SignWriting" Unicode character block. 2983 * @since 9 2984 */ 2985 public static final UnicodeBlock SUTTON_SIGNWRITING = 2986 new UnicodeBlock("SUTTON_SIGNWRITING", 2987 "SUTTON SIGNWRITING", 2988 "SUTTONSIGNWRITING"); 2989 2990 /** 2991 * Constant for the "Supplemental Symbols and Pictographs" Unicode 2992 * character block. 2993 * @since 9 2994 */ 2995 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2996 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2997 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 2998 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 2999 3000 /** 3001 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3002 * character block. 3003 * @since 9 3004 */ 3005 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3006 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3007 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3008 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3009 3010 /** 3011 * Constant for the "Syriac Supplement" Unicode 3012 * character block. 3013 * @since 11 3014 */ 3015 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3016 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3017 "SYRIAC SUPPLEMENT", 3018 "SYRIACSUPPLEMENT"); 3019 3020 /** 3021 * Constant for the "Cyrillic Extended-C" Unicode 3022 * character block. 3023 * @since 11 3024 */ 3025 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3026 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3027 "CYRILLIC EXTENDED-C", 3028 "CYRILLICEXTENDED-C"); 3029 3030 /** 3031 * Constant for the "Osage" Unicode 3032 * character block. 3033 * @since 11 3034 */ 3035 public static final UnicodeBlock OSAGE = 3036 new UnicodeBlock("OSAGE"); 3037 3038 /** 3039 * Constant for the "Newa" Unicode 3040 * character block. 3041 * @since 11 3042 */ 3043 public static final UnicodeBlock NEWA = 3044 new UnicodeBlock("NEWA"); 3045 3046 /** 3047 * Constant for the "Mongolian Supplement" Unicode 3048 * character block. 3049 * @since 11 3050 */ 3051 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3052 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3053 "MONGOLIAN SUPPLEMENT", 3054 "MONGOLIANSUPPLEMENT"); 3055 3056 /** 3057 * Constant for the "Marchen" Unicode 3058 * character block. 3059 * @since 11 3060 */ 3061 public static final UnicodeBlock MARCHEN = 3062 new UnicodeBlock("MARCHEN"); 3063 3064 /** 3065 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3066 * character block. 3067 * @since 11 3068 */ 3069 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3070 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3071 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3072 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3073 3074 /** 3075 * Constant for the "Tangut" Unicode 3076 * character block. 3077 * @since 11 3078 */ 3079 public static final UnicodeBlock TANGUT = 3080 new UnicodeBlock("TANGUT"); 3081 3082 /** 3083 * Constant for the "Tangut Components" Unicode 3084 * character block. 3085 * @since 11 3086 */ 3087 public static final UnicodeBlock TANGUT_COMPONENTS = 3088 new UnicodeBlock("TANGUT_COMPONENTS", 3089 "TANGUT COMPONENTS", 3090 "TANGUTCOMPONENTS"); 3091 3092 /** 3093 * Constant for the "Kana Extended-A" Unicode 3094 * character block. 3095 * @since 11 3096 */ 3097 public static final UnicodeBlock KANA_EXTENDED_A = 3098 new UnicodeBlock("KANA_EXTENDED_A", 3099 "KANA EXTENDED-A", 3100 "KANAEXTENDED-A"); 3101 /** 3102 * Constant for the "Glagolitic Supplement" Unicode 3103 * character block. 3104 * @since 11 3105 */ 3106 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3107 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3108 "GLAGOLITIC SUPPLEMENT", 3109 "GLAGOLITICSUPPLEMENT"); 3110 /** 3111 * Constant for the "Adlam" Unicode 3112 * character block. 3113 * @since 11 3114 */ 3115 public static final UnicodeBlock ADLAM = 3116 new UnicodeBlock("ADLAM"); 3117 3118 /** 3119 * Constant for the "Masaram Gondi" Unicode 3120 * character block. 3121 * @since 11 3122 */ 3123 public static final UnicodeBlock MASARAM_GONDI = 3124 new UnicodeBlock("MASARAM_GONDI", 3125 "MASARAM GONDI", 3126 "MASARAMGONDI"); 3127 3128 /** 3129 * Constant for the "Zanabazar Square" Unicode 3130 * character block. 3131 * @since 11 3132 */ 3133 public static final UnicodeBlock ZANABAZAR_SQUARE = 3134 new UnicodeBlock("ZANABAZAR_SQUARE", 3135 "ZANABAZAR SQUARE", 3136 "ZANABAZARSQUARE"); 3137 3138 /** 3139 * Constant for the "Nushu" Unicode 3140 * character block. 3141 * @since 11 3142 */ 3143 public static final UnicodeBlock NUSHU = 3144 new UnicodeBlock("NUSHU"); 3145 3146 /** 3147 * Constant for the "Soyombo" Unicode 3148 * character block. 3149 * @since 11 3150 */ 3151 public static final UnicodeBlock SOYOMBO = 3152 new UnicodeBlock("SOYOMBO"); 3153 3154 /** 3155 * Constant for the "Bhaiksuki" Unicode 3156 * character block. 3157 * @since 11 3158 */ 3159 public static final UnicodeBlock BHAIKSUKI = 3160 new UnicodeBlock("BHAIKSUKI"); 3161 3162 /** 3163 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3164 * character block. 3165 * @since 11 3166 */ 3167 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3168 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3169 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3170 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3171 /** 3172 * Constant for the "Georgian Extended" Unicode 3173 * character block. 3174 * @since 12 3175 */ 3176 public static final UnicodeBlock GEORGIAN_EXTENDED = 3177 new UnicodeBlock("GEORGIAN_EXTENDED", 3178 "GEORGIAN EXTENDED", 3179 "GEORGIANEXTENDED"); 3180 3181 /** 3182 * Constant for the "Hanifi Rohingya" Unicode 3183 * character block. 3184 * @since 12 3185 */ 3186 public static final UnicodeBlock HANIFI_ROHINGYA = 3187 new UnicodeBlock("HANIFI_ROHINGYA", 3188 "HANIFI ROHINGYA", 3189 "HANIFIROHINGYA"); 3190 3191 /** 3192 * Constant for the "Old Sogdian" Unicode 3193 * character block. 3194 * @since 12 3195 */ 3196 public static final UnicodeBlock OLD_SOGDIAN = 3197 new UnicodeBlock("OLD_SOGDIAN", 3198 "OLD SOGDIAN", 3199 "OLDSOGDIAN"); 3200 3201 /** 3202 * Constant for the "Sogdian" Unicode 3203 * character block. 3204 * @since 12 3205 */ 3206 public static final UnicodeBlock SOGDIAN = 3207 new UnicodeBlock("SOGDIAN"); 3208 3209 /** 3210 * Constant for the "Dogra" Unicode 3211 * character block. 3212 * @since 12 3213 */ 3214 public static final UnicodeBlock DOGRA = 3215 new UnicodeBlock("DOGRA"); 3216 3217 /** 3218 * Constant for the "Gunjala Gondi" Unicode 3219 * character block. 3220 * @since 12 3221 */ 3222 public static final UnicodeBlock GUNJALA_GONDI = 3223 new UnicodeBlock("GUNJALA_GONDI", 3224 "GUNJALA GONDI", 3225 "GUNJALAGONDI"); 3226 3227 /** 3228 * Constant for the "Makasar" Unicode 3229 * character block. 3230 * @since 12 3231 */ 3232 public static final UnicodeBlock MAKASAR = 3233 new UnicodeBlock("MAKASAR"); 3234 3235 /** 3236 * Constant for the "Medefaidrin" Unicode 3237 * character block. 3238 * @since 12 3239 */ 3240 public static final UnicodeBlock MEDEFAIDRIN = 3241 new UnicodeBlock("MEDEFAIDRIN"); 3242 3243 /** 3244 * Constant for the "Mayan Numerals" Unicode 3245 * character block. 3246 * @since 12 3247 */ 3248 public static final UnicodeBlock MAYAN_NUMERALS = 3249 new UnicodeBlock("MAYAN_NUMERALS", 3250 "MAYAN NUMERALS", 3251 "MAYANNUMERALS"); 3252 3253 /** 3254 * Constant for the "Indic Siyaq Numbers" Unicode 3255 * character block. 3256 * @since 12 3257 */ 3258 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3259 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3260 "INDIC SIYAQ NUMBERS", 3261 "INDICSIYAQNUMBERS"); 3262 3263 /** 3264 * Constant for the "Chess Symbols" Unicode 3265 * character block. 3266 * @since 12 3267 */ 3268 public static final UnicodeBlock CHESS_SYMBOLS = 3269 new UnicodeBlock("CHESS_SYMBOLS", 3270 "CHESS SYMBOLS", 3271 "CHESSSYMBOLS"); 3272 3273 /** 3274 * Constant for the "Elymaic" Unicode 3275 * character block. 3276 * @since 13 3277 */ 3278 public static final UnicodeBlock ELYMAIC = 3279 new UnicodeBlock("ELYMAIC"); 3280 3281 /** 3282 * Constant for the "Nandinagari" Unicode 3283 * character block. 3284 * @since 13 3285 */ 3286 public static final UnicodeBlock NANDINAGARI = 3287 new UnicodeBlock("NANDINAGARI"); 3288 3289 /** 3290 * Constant for the "Tamil Supplement" Unicode 3291 * character block. 3292 * @since 13 3293 */ 3294 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3295 new UnicodeBlock("TAMIL_SUPPLEMENT", 3296 "TAMIL SUPPLEMENT", 3297 "TAMILSUPPLEMENT"); 3298 3299 /** 3300 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3301 * character block. 3302 * @since 13 3303 */ 3304 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3305 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3306 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3307 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3308 3309 /** 3310 * Constant for the "Small Kana Extension" Unicode 3311 * character block. 3312 * @since 13 3313 */ 3314 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3315 new UnicodeBlock("SMALL_KANA_EXTENSION", 3316 "SMALL KANA EXTENSION", 3317 "SMALLKANAEXTENSION"); 3318 3319 /** 3320 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3321 * character block. 3322 * @since 13 3323 */ 3324 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3325 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3326 "NYIAKENG PUACHUE HMONG", 3327 "NYIAKENGPUACHUEHMONG"); 3328 3329 /** 3330 * Constant for the "Wancho" Unicode 3331 * character block. 3332 * @since 13 3333 */ 3334 public static final UnicodeBlock WANCHO = 3335 new UnicodeBlock("WANCHO"); 3336 3337 /** 3338 * Constant for the "Ottoman Siyaq Numbers" Unicode 3339 * character block. 3340 * @since 13 3341 */ 3342 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3343 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3344 "OTTOMAN SIYAQ NUMBERS", 3345 "OTTOMANSIYAQNUMBERS"); 3346 3347 /** 3348 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3349 * character block. 3350 * @since 13 3351 */ 3352 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3353 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3354 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3355 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3356 3357 /** 3358 * Constant for the "Yezidi" Unicode 3359 * character block. 3360 * @since 15 3361 */ 3362 public static final UnicodeBlock YEZIDI = 3363 new UnicodeBlock("YEZIDI"); 3364 3365 /** 3366 * Constant for the "Chorasmian" Unicode 3367 * character block. 3368 * @since 15 3369 */ 3370 public static final UnicodeBlock CHORASMIAN = 3371 new UnicodeBlock("CHORASMIAN"); 3372 3373 /** 3374 * Constant for the "Dives Akuru" Unicode 3375 * character block. 3376 * @since 15 3377 */ 3378 public static final UnicodeBlock DIVES_AKURU = 3379 new UnicodeBlock("DIVES_AKURU", 3380 "DIVES AKURU", 3381 "DIVESAKURU"); 3382 3383 /** 3384 * Constant for the "Lisu Supplement" Unicode 3385 * character block. 3386 * @since 15 3387 */ 3388 public static final UnicodeBlock LISU_SUPPLEMENT = 3389 new UnicodeBlock("LISU_SUPPLEMENT", 3390 "LISU SUPPLEMENT", 3391 "LISUSUPPLEMENT"); 3392 3393 /** 3394 * Constant for the "Khitan Small Script" Unicode 3395 * character block. 3396 * @since 15 3397 */ 3398 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3399 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3400 "KHITAN SMALL SCRIPT", 3401 "KHITANSMALLSCRIPT"); 3402 3403 /** 3404 * Constant for the "Tangut Supplement" Unicode 3405 * character block. 3406 * @since 15 3407 */ 3408 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3409 new UnicodeBlock("TANGUT_SUPPLEMENT", 3410 "TANGUT SUPPLEMENT", 3411 "TANGUTSUPPLEMENT"); 3412 3413 /** 3414 * Constant for the "Symbols for Legacy Computing" Unicode 3415 * character block. 3416 * @since 15 3417 */ 3418 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3419 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3420 "SYMBOLS FOR LEGACY COMPUTING", 3421 "SYMBOLSFORLEGACYCOMPUTING"); 3422 3423 /** 3424 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3425 * character block. 3426 * @since 15 3427 */ 3428 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3429 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3430 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3431 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3432 3433 /** 3434 * Constant for the "Arabic Extended-B" Unicode 3435 * character block. 3436 * @since 19 3437 */ 3438 public static final UnicodeBlock ARABIC_EXTENDED_B = 3439 new UnicodeBlock("ARABIC_EXTENDED_B", 3440 "ARABIC EXTENDED-B", 3441 "ARABICEXTENDED-B"); 3442 3443 /** 3444 * Constant for the "Vithkuqi" Unicode 3445 * character block. 3446 * @since 19 3447 */ 3448 public static final UnicodeBlock VITHKUQI = 3449 new UnicodeBlock("VITHKUQI"); 3450 3451 /** 3452 * Constant for the "Latin Extended-F" Unicode 3453 * character block. 3454 * @since 19 3455 */ 3456 public static final UnicodeBlock LATIN_EXTENDED_F = 3457 new UnicodeBlock("LATIN_EXTENDED_F", 3458 "LATIN EXTENDED-F", 3459 "LATINEXTENDED-F"); 3460 3461 /** 3462 * Constant for the "Old Uyghur" Unicode 3463 * character block. 3464 * @since 19 3465 */ 3466 public static final UnicodeBlock OLD_UYGHUR = 3467 new UnicodeBlock("OLD_UYGHUR", 3468 "OLD UYGHUR", 3469 "OLDUYGHUR"); 3470 3471 /** 3472 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3473 * character block. 3474 * @since 19 3475 */ 3476 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3477 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3478 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3479 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3480 3481 /** 3482 * Constant for the "Cypro-Minoan" Unicode 3483 * character block. 3484 * @since 19 3485 */ 3486 public static final UnicodeBlock CYPRO_MINOAN = 3487 new UnicodeBlock("CYPRO_MINOAN", 3488 "CYPRO-MINOAN", 3489 "CYPRO-MINOAN"); 3490 3491 /** 3492 * Constant for the "Tangsa" Unicode 3493 * character block. 3494 * @since 19 3495 */ 3496 public static final UnicodeBlock TANGSA = 3497 new UnicodeBlock("TANGSA"); 3498 3499 /** 3500 * Constant for the "Kana Extended-B" Unicode 3501 * character block. 3502 * @since 19 3503 */ 3504 public static final UnicodeBlock KANA_EXTENDED_B = 3505 new UnicodeBlock("KANA_EXTENDED_B", 3506 "KANA EXTENDED-B", 3507 "KANAEXTENDED-B"); 3508 3509 /** 3510 * Constant for the "Znamenny Musical Notation" Unicode 3511 * character block. 3512 * @since 19 3513 */ 3514 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3515 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3516 "ZNAMENNY MUSICAL NOTATION", 3517 "ZNAMENNYMUSICALNOTATION"); 3518 3519 /** 3520 * Constant for the "Latin Extended-G" Unicode 3521 * character block. 3522 * @since 19 3523 */ 3524 public static final UnicodeBlock LATIN_EXTENDED_G = 3525 new UnicodeBlock("LATIN_EXTENDED_G", 3526 "LATIN EXTENDED-G", 3527 "LATINEXTENDED-G"); 3528 3529 /** 3530 * Constant for the "Toto" Unicode 3531 * character block. 3532 * @since 19 3533 */ 3534 public static final UnicodeBlock TOTO = 3535 new UnicodeBlock("TOTO"); 3536 3537 /** 3538 * Constant for the "Ethiopic Extended-B" Unicode 3539 * character block. 3540 * @since 19 3541 */ 3542 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3543 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3544 "ETHIOPIC EXTENDED-B", 3545 "ETHIOPICEXTENDED-B"); 3546 3547 /** 3548 * Constant for the "Arabic Extended-C" Unicode 3549 * character block. 3550 * @since 20 3551 */ 3552 public static final UnicodeBlock ARABIC_EXTENDED_C = 3553 new UnicodeBlock("ARABIC_EXTENDED_C", 3554 "ARABIC EXTENDED-C", 3555 "ARABICEXTENDED-C"); 3556 3557 /** 3558 * Constant for the "Devanagari Extended-A" Unicode 3559 * character block. 3560 * @since 20 3561 */ 3562 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3563 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3564 "DEVANAGARI EXTENDED-A", 3565 "DEVANAGARIEXTENDED-A"); 3566 3567 /** 3568 * Constant for the "Kawi" Unicode 3569 * character block. 3570 * @since 20 3571 */ 3572 public static final UnicodeBlock KAWI = 3573 new UnicodeBlock("KAWI"); 3574 3575 /** 3576 * Constant for the "Kaktovik Numerals" Unicode 3577 * character block. 3578 * @since 20 3579 */ 3580 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3581 new UnicodeBlock("KAKTOVIK_NUMERALS", 3582 "KAKTOVIK NUMERALS", 3583 "KAKTOVIKNUMERALS"); 3584 3585 /** 3586 * Constant for the "Cyrillic Extended-D" Unicode 3587 * character block. 3588 * @since 20 3589 */ 3590 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3591 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3592 "CYRILLIC EXTENDED-D", 3593 "CYRILLICEXTENDED-D"); 3594 3595 /** 3596 * Constant for the "Nag Mundari" Unicode 3597 * character block. 3598 * @since 20 3599 */ 3600 public static final UnicodeBlock NAG_MUNDARI = 3601 new UnicodeBlock("NAG_MUNDARI", 3602 "NAG MUNDARI", 3603 "NAGMUNDARI"); 3604 3605 /** 3606 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3607 * character block. 3608 * @since 20 3609 */ 3610 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3611 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3612 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3613 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3614 3615 /** 3616 * Constant for the "CJK Unified Ideographs Extension I" Unicode 3617 * character block. 3618 * @since 22 3619 */ 3620 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 3621 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 3622 "CJK UNIFIED IDEOGRAPHS EXTENSION I", 3623 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI"); 3624 3625 private static final int[] blockStarts = { 3626 0x0000, // 0000..007F; Basic Latin 3627 0x0080, // 0080..00FF; Latin-1 Supplement 3628 0x0100, // 0100..017F; Latin Extended-A 3629 0x0180, // 0180..024F; Latin Extended-B 3630 0x0250, // 0250..02AF; IPA Extensions 3631 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3632 0x0300, // 0300..036F; Combining Diacritical Marks 3633 0x0370, // 0370..03FF; Greek and Coptic 3634 0x0400, // 0400..04FF; Cyrillic 3635 0x0500, // 0500..052F; Cyrillic Supplement 3636 0x0530, // 0530..058F; Armenian 3637 0x0590, // 0590..05FF; Hebrew 3638 0x0600, // 0600..06FF; Arabic 3639 0x0700, // 0700..074F; Syriac 3640 0x0750, // 0750..077F; Arabic Supplement 3641 0x0780, // 0780..07BF; Thaana 3642 0x07C0, // 07C0..07FF; NKo 3643 0x0800, // 0800..083F; Samaritan 3644 0x0840, // 0840..085F; Mandaic 3645 0x0860, // 0860..086F; Syriac Supplement 3646 0x0870, // 0870..089F; Arabic Extended-B 3647 0x08A0, // 08A0..08FF; Arabic Extended-A 3648 0x0900, // 0900..097F; Devanagari 3649 0x0980, // 0980..09FF; Bengali 3650 0x0A00, // 0A00..0A7F; Gurmukhi 3651 0x0A80, // 0A80..0AFF; Gujarati 3652 0x0B00, // 0B00..0B7F; Oriya 3653 0x0B80, // 0B80..0BFF; Tamil 3654 0x0C00, // 0C00..0C7F; Telugu 3655 0x0C80, // 0C80..0CFF; Kannada 3656 0x0D00, // 0D00..0D7F; Malayalam 3657 0x0D80, // 0D80..0DFF; Sinhala 3658 0x0E00, // 0E00..0E7F; Thai 3659 0x0E80, // 0E80..0EFF; Lao 3660 0x0F00, // 0F00..0FFF; Tibetan 3661 0x1000, // 1000..109F; Myanmar 3662 0x10A0, // 10A0..10FF; Georgian 3663 0x1100, // 1100..11FF; Hangul Jamo 3664 0x1200, // 1200..137F; Ethiopic 3665 0x1380, // 1380..139F; Ethiopic Supplement 3666 0x13A0, // 13A0..13FF; Cherokee 3667 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3668 0x1680, // 1680..169F; Ogham 3669 0x16A0, // 16A0..16FF; Runic 3670 0x1700, // 1700..171F; Tagalog 3671 0x1720, // 1720..173F; Hanunoo 3672 0x1740, // 1740..175F; Buhid 3673 0x1760, // 1760..177F; Tagbanwa 3674 0x1780, // 1780..17FF; Khmer 3675 0x1800, // 1800..18AF; Mongolian 3676 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3677 0x1900, // 1900..194F; Limbu 3678 0x1950, // 1950..197F; Tai Le 3679 0x1980, // 1980..19DF; New Tai Lue 3680 0x19E0, // 19E0..19FF; Khmer Symbols 3681 0x1A00, // 1A00..1A1F; Buginese 3682 0x1A20, // 1A20..1AAF; Tai Tham 3683 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3684 0x1B00, // 1B00..1B7F; Balinese 3685 0x1B80, // 1B80..1BBF; Sundanese 3686 0x1BC0, // 1BC0..1BFF; Batak 3687 0x1C00, // 1C00..1C4F; Lepcha 3688 0x1C50, // 1C50..1C7F; Ol Chiki 3689 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3690 0x1C90, // 1C90..1CBF; Georgian Extended 3691 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3692 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3693 0x1D00, // 1D00..1D7F; Phonetic Extensions 3694 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3695 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3696 0x1E00, // 1E00..1EFF; Latin Extended Additional 3697 0x1F00, // 1F00..1FFF; Greek Extended 3698 0x2000, // 2000..206F; General Punctuation 3699 0x2070, // 2070..209F; Superscripts and Subscripts 3700 0x20A0, // 20A0..20CF; Currency Symbols 3701 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3702 0x2100, // 2100..214F; Letterlike Symbols 3703 0x2150, // 2150..218F; Number Forms 3704 0x2190, // 2190..21FF; Arrows 3705 0x2200, // 2200..22FF; Mathematical Operators 3706 0x2300, // 2300..23FF; Miscellaneous Technical 3707 0x2400, // 2400..243F; Control Pictures 3708 0x2440, // 2440..245F; Optical Character Recognition 3709 0x2460, // 2460..24FF; Enclosed Alphanumerics 3710 0x2500, // 2500..257F; Box Drawing 3711 0x2580, // 2580..259F; Block Elements 3712 0x25A0, // 25A0..25FF; Geometric Shapes 3713 0x2600, // 2600..26FF; Miscellaneous Symbols 3714 0x2700, // 2700..27BF; Dingbats 3715 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3716 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3717 0x2800, // 2800..28FF; Braille Patterns 3718 0x2900, // 2900..297F; Supplemental Arrows-B 3719 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3720 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3721 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3722 0x2C00, // 2C00..2C5F; Glagolitic 3723 0x2C60, // 2C60..2C7F; Latin Extended-C 3724 0x2C80, // 2C80..2CFF; Coptic 3725 0x2D00, // 2D00..2D2F; Georgian Supplement 3726 0x2D30, // 2D30..2D7F; Tifinagh 3727 0x2D80, // 2D80..2DDF; Ethiopic Extended 3728 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3729 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3730 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3731 0x2F00, // 2F00..2FDF; Kangxi Radicals 3732 0x2FE0, // unassigned 3733 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3734 0x3000, // 3000..303F; CJK Symbols and Punctuation 3735 0x3040, // 3040..309F; Hiragana 3736 0x30A0, // 30A0..30FF; Katakana 3737 0x3100, // 3100..312F; Bopomofo 3738 0x3130, // 3130..318F; Hangul Compatibility Jamo 3739 0x3190, // 3190..319F; Kanbun 3740 0x31A0, // 31A0..31BF; Bopomofo Extended 3741 0x31C0, // 31C0..31EF; CJK Strokes 3742 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3743 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3744 0x3300, // 3300..33FF; CJK Compatibility 3745 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3746 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3747 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3748 0xA000, // A000..A48F; Yi Syllables 3749 0xA490, // A490..A4CF; Yi Radicals 3750 0xA4D0, // A4D0..A4FF; Lisu 3751 0xA500, // A500..A63F; Vai 3752 0xA640, // A640..A69F; Cyrillic Extended-B 3753 0xA6A0, // A6A0..A6FF; Bamum 3754 0xA700, // A700..A71F; Modifier Tone Letters 3755 0xA720, // A720..A7FF; Latin Extended-D 3756 0xA800, // A800..A82F; Syloti Nagri 3757 0xA830, // A830..A83F; Common Indic Number Forms 3758 0xA840, // A840..A87F; Phags-pa 3759 0xA880, // A880..A8DF; Saurashtra 3760 0xA8E0, // A8E0..A8FF; Devanagari Extended 3761 0xA900, // A900..A92F; Kayah Li 3762 0xA930, // A930..A95F; Rejang 3763 0xA960, // A960..A97F; Hangul Jamo Extended-A 3764 0xA980, // A980..A9DF; Javanese 3765 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3766 0xAA00, // AA00..AA5F; Cham 3767 0xAA60, // AA60..AA7F; Myanmar Extended-A 3768 0xAA80, // AA80..AADF; Tai Viet 3769 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3770 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3771 0xAB30, // AB30..AB6F; Latin Extended-E 3772 0xAB70, // AB70..ABBF; Cherokee Supplement 3773 0xABC0, // ABC0..ABFF; Meetei Mayek 3774 0xAC00, // AC00..D7AF; Hangul Syllables 3775 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3776 0xD800, // D800..DB7F; High Surrogates 3777 0xDB80, // DB80..DBFF; High Private Use Surrogates 3778 0xDC00, // DC00..DFFF; Low Surrogates 3779 0xE000, // E000..F8FF; Private Use Area 3780 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3781 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3782 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3783 0xFE00, // FE00..FE0F; Variation Selectors 3784 0xFE10, // FE10..FE1F; Vertical Forms 3785 0xFE20, // FE20..FE2F; Combining Half Marks 3786 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3787 0xFE50, // FE50..FE6F; Small Form Variants 3788 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3789 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3790 0xFFF0, // FFF0..FFFF; Specials 3791 0x10000, // 10000..1007F; Linear B Syllabary 3792 0x10080, // 10080..100FF; Linear B Ideograms 3793 0x10100, // 10100..1013F; Aegean Numbers 3794 0x10140, // 10140..1018F; Ancient Greek Numbers 3795 0x10190, // 10190..101CF; Ancient Symbols 3796 0x101D0, // 101D0..101FF; Phaistos Disc 3797 0x10200, // unassigned 3798 0x10280, // 10280..1029F; Lycian 3799 0x102A0, // 102A0..102DF; Carian 3800 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3801 0x10300, // 10300..1032F; Old Italic 3802 0x10330, // 10330..1034F; Gothic 3803 0x10350, // 10350..1037F; Old Permic 3804 0x10380, // 10380..1039F; Ugaritic 3805 0x103A0, // 103A0..103DF; Old Persian 3806 0x103E0, // unassigned 3807 0x10400, // 10400..1044F; Deseret 3808 0x10450, // 10450..1047F; Shavian 3809 0x10480, // 10480..104AF; Osmanya 3810 0x104B0, // 104B0..104FF; Osage 3811 0x10500, // 10500..1052F; Elbasan 3812 0x10530, // 10530..1056F; Caucasian Albanian 3813 0x10570, // 10570..105BF; Vithkuqi 3814 0x105C0, // unassigned 3815 0x10600, // 10600..1077F; Linear A 3816 0x10780, // 10780..107BF; Latin Extended-F 3817 0x107C0, // unassigned 3818 0x10800, // 10800..1083F; Cypriot Syllabary 3819 0x10840, // 10840..1085F; Imperial Aramaic 3820 0x10860, // 10860..1087F; Palmyrene 3821 0x10880, // 10880..108AF; Nabataean 3822 0x108B0, // unassigned 3823 0x108E0, // 108E0..108FF; Hatran 3824 0x10900, // 10900..1091F; Phoenician 3825 0x10920, // 10920..1093F; Lydian 3826 0x10940, // unassigned 3827 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3828 0x109A0, // 109A0..109FF; Meroitic Cursive 3829 0x10A00, // 10A00..10A5F; Kharoshthi 3830 0x10A60, // 10A60..10A7F; Old South Arabian 3831 0x10A80, // 10A80..10A9F; Old North Arabian 3832 0x10AA0, // unassigned 3833 0x10AC0, // 10AC0..10AFF; Manichaean 3834 0x10B00, // 10B00..10B3F; Avestan 3835 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3836 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3837 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3838 0x10BB0, // unassigned 3839 0x10C00, // 10C00..10C4F; Old Turkic 3840 0x10C50, // unassigned 3841 0x10C80, // 10C80..10CFF; Old Hungarian 3842 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3843 0x10D40, // unassigned 3844 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3845 0x10E80, // 10E80..10EBF; Yezidi 3846 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 3847 0x10F00, // 10F00..10F2F; Old Sogdian 3848 0x10F30, // 10F30..10F6F; Sogdian 3849 0x10F70, // 10F70..10FAF; Old Uyghur 3850 0x10FB0, // 10FB0..10FDF; Chorasmian 3851 0x10FE0, // 10FE0..10FFF; Elymaic 3852 0x11000, // 11000..1107F; Brahmi 3853 0x11080, // 11080..110CF; Kaithi 3854 0x110D0, // 110D0..110FF; Sora Sompeng 3855 0x11100, // 11100..1114F; Chakma 3856 0x11150, // 11150..1117F; Mahajani 3857 0x11180, // 11180..111DF; Sharada 3858 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3859 0x11200, // 11200..1124F; Khojki 3860 0x11250, // unassigned 3861 0x11280, // 11280..112AF; Multani 3862 0x112B0, // 112B0..112FF; Khudawadi 3863 0x11300, // 11300..1137F; Grantha 3864 0x11380, // unassigned 3865 0x11400, // 11400..1147F; Newa 3866 0x11480, // 11480..114DF; Tirhuta 3867 0x114E0, // unassigned 3868 0x11580, // 11580..115FF; Siddham 3869 0x11600, // 11600..1165F; Modi 3870 0x11660, // 11660..1167F; Mongolian Supplement 3871 0x11680, // 11680..116CF; Takri 3872 0x116D0, // unassigned 3873 0x11700, // 11700..1174F; Ahom 3874 0x11750, // unassigned 3875 0x11800, // 11800..1184F; Dogra 3876 0x11850, // unassigned 3877 0x118A0, // 118A0..118FF; Warang Citi 3878 0x11900, // 11900..1195F; Dives Akuru 3879 0x11960, // unassigned 3880 0x119A0, // 119A0..119FF; Nandinagari 3881 0x11A00, // 11A00..11A4F; Zanabazar Square 3882 0x11A50, // 11A50..11AAF; Soyombo 3883 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 3884 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3885 0x11B00, // 11B00..11B5F; Devanagari Extended-A 3886 0x11B60, // unassigned 3887 0x11C00, // 11C00..11C6F; Bhaiksuki 3888 0x11C70, // 11C70..11CBF; Marchen 3889 0x11CC0, // unassigned 3890 0x11D00, // 11D00..11D5F; Masaram Gondi 3891 0x11D60, // 11D60..11DAF; Gunjala Gondi 3892 0x11DB0, // unassigned 3893 0x11EE0, // 11EE0..11EFF; Makasar 3894 0x11F00, // 11F00..11F5F; Kawi 3895 0x11F60, // unassigned 3896 0x11FB0, // 11FB0..11FBF; Lisu Supplement 3897 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3898 0x12000, // 12000..123FF; Cuneiform 3899 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3900 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3901 0x12550, // unassigned 3902 0x12F90, // 12F90..12FFF; Cypro-Minoan 3903 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3904 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 3905 0x13460, // unassigned 3906 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3907 0x14680, // unassigned 3908 0x16800, // 16800..16A3F; Bamum Supplement 3909 0x16A40, // 16A40..16A6F; Mro 3910 0x16A70, // 16A70..16ACF; Tangsa 3911 0x16AD0, // 16AD0..16AFF; Bassa Vah 3912 0x16B00, // 16B00..16B8F; Pahawh Hmong 3913 0x16B90, // unassigned 3914 0x16E40, // 16E40..16E9F; Medefaidrin 3915 0x16EA0, // unassigned 3916 0x16F00, // 16F00..16F9F; Miao 3917 0x16FA0, // unassigned 3918 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3919 0x17000, // 17000..187FF; Tangut 3920 0x18800, // 18800..18AFF; Tangut Components 3921 0x18B00, // 18B00..18CFF; Khitan Small Script 3922 0x18D00, // 18D00..18D7F; Tangut Supplement 3923 0x18D80, // unassigned 3924 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 3925 0x1B000, // 1B000..1B0FF; Kana Supplement 3926 0x1B100, // 1B100..1B12F; Kana Extended-A 3927 0x1B130, // 1B130..1B16F; Small Kana Extension 3928 0x1B170, // 1B170..1B2FF; Nushu 3929 0x1B300, // unassigned 3930 0x1BC00, // 1BC00..1BC9F; Duployan 3931 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3932 0x1BCB0, // unassigned 3933 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 3934 0x1CFD0, // unassigned 3935 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3936 0x1D100, // 1D100..1D1FF; Musical Symbols 3937 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3938 0x1D250, // unassigned 3939 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 3940 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3941 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3942 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3943 0x1D380, // unassigned 3944 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3945 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3946 0x1DAB0, // unassigned 3947 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 3948 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3949 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 3950 0x1E090, // unassigned 3951 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 3952 0x1E150, // unassigned 3953 0x1E290, // 1E290..1E2BF; Toto 3954 0x1E2C0, // 1E2C0..1E2FF; Wancho 3955 0x1E300, // unassigned 3956 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 3957 0x1E500, // unassigned 3958 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 3959 0x1E800, // 1E800..1E8DF; Mende Kikakui 3960 0x1E8E0, // unassigned 3961 0x1E900, // 1E900..1E95F; Adlam 3962 0x1E960, // unassigned 3963 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 3964 0x1ECC0, // unassigned 3965 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 3966 0x1ED50, // unassigned 3967 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3968 0x1EF00, // unassigned 3969 0x1F000, // 1F000..1F02F; Mahjong Tiles 3970 0x1F030, // 1F030..1F09F; Domino Tiles 3971 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3972 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3973 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3974 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3975 0x1F600, // 1F600..1F64F; Emoticons 3976 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3977 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3978 0x1F700, // 1F700..1F77F; Alchemical Symbols 3979 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3980 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3981 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3982 0x1FA00, // 1FA00..1FA6F; Chess Symbols 3983 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 3984 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 3985 0x1FC00, // unassigned 3986 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3987 0x2A6E0, // unassigned 3988 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3989 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3990 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 3991 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 3992 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I 3993 0x2EE60, // unassigned 3994 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3995 0x2FA20, // unassigned 3996 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 3997 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 3998 0x323B0, // unassigned 3999 0xE0000, // E0000..E007F; Tags 4000 0xE0080, // unassigned 4001 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4002 0xE01F0, // unassigned 4003 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4004 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4005 }; 4006 4007 private static final UnicodeBlock[] blocks = { 4008 BASIC_LATIN, 4009 LATIN_1_SUPPLEMENT, 4010 LATIN_EXTENDED_A, 4011 LATIN_EXTENDED_B, 4012 IPA_EXTENSIONS, 4013 SPACING_MODIFIER_LETTERS, 4014 COMBINING_DIACRITICAL_MARKS, 4015 GREEK, 4016 CYRILLIC, 4017 CYRILLIC_SUPPLEMENTARY, 4018 ARMENIAN, 4019 HEBREW, 4020 ARABIC, 4021 SYRIAC, 4022 ARABIC_SUPPLEMENT, 4023 THAANA, 4024 NKO, 4025 SAMARITAN, 4026 MANDAIC, 4027 SYRIAC_SUPPLEMENT, 4028 ARABIC_EXTENDED_B, 4029 ARABIC_EXTENDED_A, 4030 DEVANAGARI, 4031 BENGALI, 4032 GURMUKHI, 4033 GUJARATI, 4034 ORIYA, 4035 TAMIL, 4036 TELUGU, 4037 KANNADA, 4038 MALAYALAM, 4039 SINHALA, 4040 THAI, 4041 LAO, 4042 TIBETAN, 4043 MYANMAR, 4044 GEORGIAN, 4045 HANGUL_JAMO, 4046 ETHIOPIC, 4047 ETHIOPIC_SUPPLEMENT, 4048 CHEROKEE, 4049 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4050 OGHAM, 4051 RUNIC, 4052 TAGALOG, 4053 HANUNOO, 4054 BUHID, 4055 TAGBANWA, 4056 KHMER, 4057 MONGOLIAN, 4058 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4059 LIMBU, 4060 TAI_LE, 4061 NEW_TAI_LUE, 4062 KHMER_SYMBOLS, 4063 BUGINESE, 4064 TAI_THAM, 4065 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4066 BALINESE, 4067 SUNDANESE, 4068 BATAK, 4069 LEPCHA, 4070 OL_CHIKI, 4071 CYRILLIC_EXTENDED_C, 4072 GEORGIAN_EXTENDED, 4073 SUNDANESE_SUPPLEMENT, 4074 VEDIC_EXTENSIONS, 4075 PHONETIC_EXTENSIONS, 4076 PHONETIC_EXTENSIONS_SUPPLEMENT, 4077 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4078 LATIN_EXTENDED_ADDITIONAL, 4079 GREEK_EXTENDED, 4080 GENERAL_PUNCTUATION, 4081 SUPERSCRIPTS_AND_SUBSCRIPTS, 4082 CURRENCY_SYMBOLS, 4083 COMBINING_MARKS_FOR_SYMBOLS, 4084 LETTERLIKE_SYMBOLS, 4085 NUMBER_FORMS, 4086 ARROWS, 4087 MATHEMATICAL_OPERATORS, 4088 MISCELLANEOUS_TECHNICAL, 4089 CONTROL_PICTURES, 4090 OPTICAL_CHARACTER_RECOGNITION, 4091 ENCLOSED_ALPHANUMERICS, 4092 BOX_DRAWING, 4093 BLOCK_ELEMENTS, 4094 GEOMETRIC_SHAPES, 4095 MISCELLANEOUS_SYMBOLS, 4096 DINGBATS, 4097 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4098 SUPPLEMENTAL_ARROWS_A, 4099 BRAILLE_PATTERNS, 4100 SUPPLEMENTAL_ARROWS_B, 4101 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4102 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4103 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4104 GLAGOLITIC, 4105 LATIN_EXTENDED_C, 4106 COPTIC, 4107 GEORGIAN_SUPPLEMENT, 4108 TIFINAGH, 4109 ETHIOPIC_EXTENDED, 4110 CYRILLIC_EXTENDED_A, 4111 SUPPLEMENTAL_PUNCTUATION, 4112 CJK_RADICALS_SUPPLEMENT, 4113 KANGXI_RADICALS, 4114 null, 4115 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4116 CJK_SYMBOLS_AND_PUNCTUATION, 4117 HIRAGANA, 4118 KATAKANA, 4119 BOPOMOFO, 4120 HANGUL_COMPATIBILITY_JAMO, 4121 KANBUN, 4122 BOPOMOFO_EXTENDED, 4123 CJK_STROKES, 4124 KATAKANA_PHONETIC_EXTENSIONS, 4125 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4126 CJK_COMPATIBILITY, 4127 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4128 YIJING_HEXAGRAM_SYMBOLS, 4129 CJK_UNIFIED_IDEOGRAPHS, 4130 YI_SYLLABLES, 4131 YI_RADICALS, 4132 LISU, 4133 VAI, 4134 CYRILLIC_EXTENDED_B, 4135 BAMUM, 4136 MODIFIER_TONE_LETTERS, 4137 LATIN_EXTENDED_D, 4138 SYLOTI_NAGRI, 4139 COMMON_INDIC_NUMBER_FORMS, 4140 PHAGS_PA, 4141 SAURASHTRA, 4142 DEVANAGARI_EXTENDED, 4143 KAYAH_LI, 4144 REJANG, 4145 HANGUL_JAMO_EXTENDED_A, 4146 JAVANESE, 4147 MYANMAR_EXTENDED_B, 4148 CHAM, 4149 MYANMAR_EXTENDED_A, 4150 TAI_VIET, 4151 MEETEI_MAYEK_EXTENSIONS, 4152 ETHIOPIC_EXTENDED_A, 4153 LATIN_EXTENDED_E, 4154 CHEROKEE_SUPPLEMENT, 4155 MEETEI_MAYEK, 4156 HANGUL_SYLLABLES, 4157 HANGUL_JAMO_EXTENDED_B, 4158 HIGH_SURROGATES, 4159 HIGH_PRIVATE_USE_SURROGATES, 4160 LOW_SURROGATES, 4161 PRIVATE_USE_AREA, 4162 CJK_COMPATIBILITY_IDEOGRAPHS, 4163 ALPHABETIC_PRESENTATION_FORMS, 4164 ARABIC_PRESENTATION_FORMS_A, 4165 VARIATION_SELECTORS, 4166 VERTICAL_FORMS, 4167 COMBINING_HALF_MARKS, 4168 CJK_COMPATIBILITY_FORMS, 4169 SMALL_FORM_VARIANTS, 4170 ARABIC_PRESENTATION_FORMS_B, 4171 HALFWIDTH_AND_FULLWIDTH_FORMS, 4172 SPECIALS, 4173 LINEAR_B_SYLLABARY, 4174 LINEAR_B_IDEOGRAMS, 4175 AEGEAN_NUMBERS, 4176 ANCIENT_GREEK_NUMBERS, 4177 ANCIENT_SYMBOLS, 4178 PHAISTOS_DISC, 4179 null, 4180 LYCIAN, 4181 CARIAN, 4182 COPTIC_EPACT_NUMBERS, 4183 OLD_ITALIC, 4184 GOTHIC, 4185 OLD_PERMIC, 4186 UGARITIC, 4187 OLD_PERSIAN, 4188 null, 4189 DESERET, 4190 SHAVIAN, 4191 OSMANYA, 4192 OSAGE, 4193 ELBASAN, 4194 CAUCASIAN_ALBANIAN, 4195 VITHKUQI, 4196 null, 4197 LINEAR_A, 4198 LATIN_EXTENDED_F, 4199 null, 4200 CYPRIOT_SYLLABARY, 4201 IMPERIAL_ARAMAIC, 4202 PALMYRENE, 4203 NABATAEAN, 4204 null, 4205 HATRAN, 4206 PHOENICIAN, 4207 LYDIAN, 4208 null, 4209 MEROITIC_HIEROGLYPHS, 4210 MEROITIC_CURSIVE, 4211 KHAROSHTHI, 4212 OLD_SOUTH_ARABIAN, 4213 OLD_NORTH_ARABIAN, 4214 null, 4215 MANICHAEAN, 4216 AVESTAN, 4217 INSCRIPTIONAL_PARTHIAN, 4218 INSCRIPTIONAL_PAHLAVI, 4219 PSALTER_PAHLAVI, 4220 null, 4221 OLD_TURKIC, 4222 null, 4223 OLD_HUNGARIAN, 4224 HANIFI_ROHINGYA, 4225 null, 4226 RUMI_NUMERAL_SYMBOLS, 4227 YEZIDI, 4228 ARABIC_EXTENDED_C, 4229 OLD_SOGDIAN, 4230 SOGDIAN, 4231 OLD_UYGHUR, 4232 CHORASMIAN, 4233 ELYMAIC, 4234 BRAHMI, 4235 KAITHI, 4236 SORA_SOMPENG, 4237 CHAKMA, 4238 MAHAJANI, 4239 SHARADA, 4240 SINHALA_ARCHAIC_NUMBERS, 4241 KHOJKI, 4242 null, 4243 MULTANI, 4244 KHUDAWADI, 4245 GRANTHA, 4246 null, 4247 NEWA, 4248 TIRHUTA, 4249 null, 4250 SIDDHAM, 4251 MODI, 4252 MONGOLIAN_SUPPLEMENT, 4253 TAKRI, 4254 null, 4255 AHOM, 4256 null, 4257 DOGRA, 4258 null, 4259 WARANG_CITI, 4260 DIVES_AKURU, 4261 null, 4262 NANDINAGARI, 4263 ZANABAZAR_SQUARE, 4264 SOYOMBO, 4265 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4266 PAU_CIN_HAU, 4267 DEVANAGARI_EXTENDED_A, 4268 null, 4269 BHAIKSUKI, 4270 MARCHEN, 4271 null, 4272 MASARAM_GONDI, 4273 GUNJALA_GONDI, 4274 null, 4275 MAKASAR, 4276 KAWI, 4277 null, 4278 LISU_SUPPLEMENT, 4279 TAMIL_SUPPLEMENT, 4280 CUNEIFORM, 4281 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4282 EARLY_DYNASTIC_CUNEIFORM, 4283 null, 4284 CYPRO_MINOAN, 4285 EGYPTIAN_HIEROGLYPHS, 4286 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4287 null, 4288 ANATOLIAN_HIEROGLYPHS, 4289 null, 4290 BAMUM_SUPPLEMENT, 4291 MRO, 4292 TANGSA, 4293 BASSA_VAH, 4294 PAHAWH_HMONG, 4295 null, 4296 MEDEFAIDRIN, 4297 null, 4298 MIAO, 4299 null, 4300 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4301 TANGUT, 4302 TANGUT_COMPONENTS, 4303 KHITAN_SMALL_SCRIPT, 4304 TANGUT_SUPPLEMENT, 4305 null, 4306 KANA_EXTENDED_B, 4307 KANA_SUPPLEMENT, 4308 KANA_EXTENDED_A, 4309 SMALL_KANA_EXTENSION, 4310 NUSHU, 4311 null, 4312 DUPLOYAN, 4313 SHORTHAND_FORMAT_CONTROLS, 4314 null, 4315 ZNAMENNY_MUSICAL_NOTATION, 4316 null, 4317 BYZANTINE_MUSICAL_SYMBOLS, 4318 MUSICAL_SYMBOLS, 4319 ANCIENT_GREEK_MUSICAL_NOTATION, 4320 null, 4321 KAKTOVIK_NUMERALS, 4322 MAYAN_NUMERALS, 4323 TAI_XUAN_JING_SYMBOLS, 4324 COUNTING_ROD_NUMERALS, 4325 null, 4326 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4327 SUTTON_SIGNWRITING, 4328 null, 4329 LATIN_EXTENDED_G, 4330 GLAGOLITIC_SUPPLEMENT, 4331 CYRILLIC_EXTENDED_D, 4332 null, 4333 NYIAKENG_PUACHUE_HMONG, 4334 null, 4335 TOTO, 4336 WANCHO, 4337 null, 4338 NAG_MUNDARI, 4339 null, 4340 ETHIOPIC_EXTENDED_B, 4341 MENDE_KIKAKUI, 4342 null, 4343 ADLAM, 4344 null, 4345 INDIC_SIYAQ_NUMBERS, 4346 null, 4347 OTTOMAN_SIYAQ_NUMBERS, 4348 null, 4349 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4350 null, 4351 MAHJONG_TILES, 4352 DOMINO_TILES, 4353 PLAYING_CARDS, 4354 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4355 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4356 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4357 EMOTICONS, 4358 ORNAMENTAL_DINGBATS, 4359 TRANSPORT_AND_MAP_SYMBOLS, 4360 ALCHEMICAL_SYMBOLS, 4361 GEOMETRIC_SHAPES_EXTENDED, 4362 SUPPLEMENTAL_ARROWS_C, 4363 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4364 CHESS_SYMBOLS, 4365 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4366 SYMBOLS_FOR_LEGACY_COMPUTING, 4367 null, 4368 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4369 null, 4370 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4371 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4372 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4373 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4374 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, 4375 null, 4376 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4377 null, 4378 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4379 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4380 null, 4381 TAGS, 4382 null, 4383 VARIATION_SELECTORS_SUPPLEMENT, 4384 null, 4385 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4386 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4387 }; 4388 4389 4390 /** 4391 * Returns the object representing the Unicode block containing the 4392 * given character, or {@code null} if the character is not a 4393 * member of a defined block. 4394 * 4395 * <p><b>Note:</b> This method cannot handle 4396 * <a href="Character.html#supplementary"> supplementary 4397 * characters</a>. To support all Unicode characters, including 4398 * supplementary characters, use the {@link #of(int)} method. 4399 * 4400 * @param c The character in question 4401 * @return The {@code UnicodeBlock} instance representing the 4402 * Unicode block of which this character is a member, or 4403 * {@code null} if the character is not a member of any 4404 * Unicode block 4405 */ 4406 public static UnicodeBlock of(char c) { 4407 return of((int)c); 4408 } 4409 4410 /** 4411 * Returns the object representing the Unicode block 4412 * containing the given character (Unicode code point), or 4413 * {@code null} if the character is not a member of a 4414 * defined block. 4415 * 4416 * @param codePoint the character (Unicode code point) in question. 4417 * @return The {@code UnicodeBlock} instance representing the 4418 * Unicode block of which this character is a member, or 4419 * {@code null} if the character is not a member of any 4420 * Unicode block 4421 * @throws IllegalArgumentException if the specified 4422 * {@code codePoint} is an invalid Unicode code point. 4423 * @see Character#isValidCodePoint(int) 4424 * @since 1.5 4425 */ 4426 public static UnicodeBlock of(int codePoint) { 4427 if (!isValidCodePoint(codePoint)) { 4428 throw new IllegalArgumentException( 4429 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4430 } 4431 4432 int top, bottom, current; 4433 bottom = 0; 4434 top = blockStarts.length; 4435 current = top/2; 4436 4437 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4438 while (top - bottom > 1) { 4439 if (codePoint >= blockStarts[current]) { 4440 bottom = current; 4441 } else { 4442 top = current; 4443 } 4444 current = (top + bottom) / 2; 4445 } 4446 return blocks[current]; 4447 } 4448 4449 /** 4450 * Returns the UnicodeBlock with the given name. Block 4451 * names are determined by The Unicode Standard. The file 4452 * {@code Blocks.txt} defines blocks for a particular 4453 * version of the standard. The {@link Character} class specifies 4454 * the version of the standard that it supports. 4455 * <p> 4456 * This method accepts block names in the following forms: 4457 * <ol> 4458 * <li> Canonical block names as defined by the Unicode Standard. 4459 * For example, the standard defines a "Basic Latin" block. Therefore, this 4460 * method accepts "Basic Latin" as a valid block name. The documentation of 4461 * each UnicodeBlock provides the canonical name. 4462 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4463 * is a valid block name for the "Basic Latin" block. 4464 * <li>The text representation of each constant UnicodeBlock identifier. 4465 * For example, this method will return the {@link #BASIC_LATIN} block if 4466 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4467 * hyphens in the canonical name with underscores. 4468 * </ol> 4469 * Finally, character case is ignored for all of the valid block name forms. 4470 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4471 * The en_US locale's case mapping rules are used to provide case-insensitive 4472 * string comparisons for block name validation. 4473 * <p> 4474 * If the Unicode Standard changes block names, both the previous and 4475 * current names will be accepted. 4476 * 4477 * @param blockName A {@code UnicodeBlock} name. 4478 * @return The {@code UnicodeBlock} instance identified 4479 * by {@code blockName} 4480 * @throws IllegalArgumentException if {@code blockName} is an 4481 * invalid name 4482 * @throws NullPointerException if {@code blockName} is null 4483 * @since 1.5 4484 */ 4485 public static final UnicodeBlock forName(String blockName) { 4486 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4487 if (block == null) { 4488 throw new IllegalArgumentException("Not a valid block name: " 4489 + blockName); 4490 } 4491 return block; 4492 } 4493 } 4494 4495 4496 /** 4497 * A family of character subsets representing the character scripts 4498 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4499 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4500 * character is assigned to a single Unicode script, either a specific 4501 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4502 * one of the following three special values, 4503 * {@link Character.UnicodeScript#INHERITED Inherited}, 4504 * {@link Character.UnicodeScript#COMMON Common} or 4505 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4506 * 4507 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4508 * @since 1.7 4509 */ 4510 public static enum UnicodeScript { 4511 /** 4512 * Unicode script "Common". 4513 */ 4514 COMMON, 4515 4516 /** 4517 * Unicode script "Latin". 4518 */ 4519 LATIN, 4520 4521 /** 4522 * Unicode script "Greek". 4523 */ 4524 GREEK, 4525 4526 /** 4527 * Unicode script "Cyrillic". 4528 */ 4529 CYRILLIC, 4530 4531 /** 4532 * Unicode script "Armenian". 4533 */ 4534 ARMENIAN, 4535 4536 /** 4537 * Unicode script "Hebrew". 4538 */ 4539 HEBREW, 4540 4541 /** 4542 * Unicode script "Arabic". 4543 */ 4544 ARABIC, 4545 4546 /** 4547 * Unicode script "Syriac". 4548 */ 4549 SYRIAC, 4550 4551 /** 4552 * Unicode script "Thaana". 4553 */ 4554 THAANA, 4555 4556 /** 4557 * Unicode script "Devanagari". 4558 */ 4559 DEVANAGARI, 4560 4561 /** 4562 * Unicode script "Bengali". 4563 */ 4564 BENGALI, 4565 4566 /** 4567 * Unicode script "Gurmukhi". 4568 */ 4569 GURMUKHI, 4570 4571 /** 4572 * Unicode script "Gujarati". 4573 */ 4574 GUJARATI, 4575 4576 /** 4577 * Unicode script "Oriya". 4578 */ 4579 ORIYA, 4580 4581 /** 4582 * Unicode script "Tamil". 4583 */ 4584 TAMIL, 4585 4586 /** 4587 * Unicode script "Telugu". 4588 */ 4589 TELUGU, 4590 4591 /** 4592 * Unicode script "Kannada". 4593 */ 4594 KANNADA, 4595 4596 /** 4597 * Unicode script "Malayalam". 4598 */ 4599 MALAYALAM, 4600 4601 /** 4602 * Unicode script "Sinhala". 4603 */ 4604 SINHALA, 4605 4606 /** 4607 * Unicode script "Thai". 4608 */ 4609 THAI, 4610 4611 /** 4612 * Unicode script "Lao". 4613 */ 4614 LAO, 4615 4616 /** 4617 * Unicode script "Tibetan". 4618 */ 4619 TIBETAN, 4620 4621 /** 4622 * Unicode script "Myanmar". 4623 */ 4624 MYANMAR, 4625 4626 /** 4627 * Unicode script "Georgian". 4628 */ 4629 GEORGIAN, 4630 4631 /** 4632 * Unicode script "Hangul". 4633 */ 4634 HANGUL, 4635 4636 /** 4637 * Unicode script "Ethiopic". 4638 */ 4639 ETHIOPIC, 4640 4641 /** 4642 * Unicode script "Cherokee". 4643 */ 4644 CHEROKEE, 4645 4646 /** 4647 * Unicode script "Canadian_Aboriginal". 4648 */ 4649 CANADIAN_ABORIGINAL, 4650 4651 /** 4652 * Unicode script "Ogham". 4653 */ 4654 OGHAM, 4655 4656 /** 4657 * Unicode script "Runic". 4658 */ 4659 RUNIC, 4660 4661 /** 4662 * Unicode script "Khmer". 4663 */ 4664 KHMER, 4665 4666 /** 4667 * Unicode script "Mongolian". 4668 */ 4669 MONGOLIAN, 4670 4671 /** 4672 * Unicode script "Hiragana". 4673 */ 4674 HIRAGANA, 4675 4676 /** 4677 * Unicode script "Katakana". 4678 */ 4679 KATAKANA, 4680 4681 /** 4682 * Unicode script "Bopomofo". 4683 */ 4684 BOPOMOFO, 4685 4686 /** 4687 * Unicode script "Han". 4688 */ 4689 HAN, 4690 4691 /** 4692 * Unicode script "Yi". 4693 */ 4694 YI, 4695 4696 /** 4697 * Unicode script "Old_Italic". 4698 */ 4699 OLD_ITALIC, 4700 4701 /** 4702 * Unicode script "Gothic". 4703 */ 4704 GOTHIC, 4705 4706 /** 4707 * Unicode script "Deseret". 4708 */ 4709 DESERET, 4710 4711 /** 4712 * Unicode script "Inherited". 4713 */ 4714 INHERITED, 4715 4716 /** 4717 * Unicode script "Tagalog". 4718 */ 4719 TAGALOG, 4720 4721 /** 4722 * Unicode script "Hanunoo". 4723 */ 4724 HANUNOO, 4725 4726 /** 4727 * Unicode script "Buhid". 4728 */ 4729 BUHID, 4730 4731 /** 4732 * Unicode script "Tagbanwa". 4733 */ 4734 TAGBANWA, 4735 4736 /** 4737 * Unicode script "Limbu". 4738 */ 4739 LIMBU, 4740 4741 /** 4742 * Unicode script "Tai_Le". 4743 */ 4744 TAI_LE, 4745 4746 /** 4747 * Unicode script "Linear_B". 4748 */ 4749 LINEAR_B, 4750 4751 /** 4752 * Unicode script "Ugaritic". 4753 */ 4754 UGARITIC, 4755 4756 /** 4757 * Unicode script "Shavian". 4758 */ 4759 SHAVIAN, 4760 4761 /** 4762 * Unicode script "Osmanya". 4763 */ 4764 OSMANYA, 4765 4766 /** 4767 * Unicode script "Cypriot". 4768 */ 4769 CYPRIOT, 4770 4771 /** 4772 * Unicode script "Braille". 4773 */ 4774 BRAILLE, 4775 4776 /** 4777 * Unicode script "Buginese". 4778 */ 4779 BUGINESE, 4780 4781 /** 4782 * Unicode script "Coptic". 4783 */ 4784 COPTIC, 4785 4786 /** 4787 * Unicode script "New_Tai_Lue". 4788 */ 4789 NEW_TAI_LUE, 4790 4791 /** 4792 * Unicode script "Glagolitic". 4793 */ 4794 GLAGOLITIC, 4795 4796 /** 4797 * Unicode script "Tifinagh". 4798 */ 4799 TIFINAGH, 4800 4801 /** 4802 * Unicode script "Syloti_Nagri". 4803 */ 4804 SYLOTI_NAGRI, 4805 4806 /** 4807 * Unicode script "Old_Persian". 4808 */ 4809 OLD_PERSIAN, 4810 4811 /** 4812 * Unicode script "Kharoshthi". 4813 */ 4814 KHAROSHTHI, 4815 4816 /** 4817 * Unicode script "Balinese". 4818 */ 4819 BALINESE, 4820 4821 /** 4822 * Unicode script "Cuneiform". 4823 */ 4824 CUNEIFORM, 4825 4826 /** 4827 * Unicode script "Phoenician". 4828 */ 4829 PHOENICIAN, 4830 4831 /** 4832 * Unicode script "Phags_Pa". 4833 */ 4834 PHAGS_PA, 4835 4836 /** 4837 * Unicode script "Nko". 4838 */ 4839 NKO, 4840 4841 /** 4842 * Unicode script "Sundanese". 4843 */ 4844 SUNDANESE, 4845 4846 /** 4847 * Unicode script "Batak". 4848 */ 4849 BATAK, 4850 4851 /** 4852 * Unicode script "Lepcha". 4853 */ 4854 LEPCHA, 4855 4856 /** 4857 * Unicode script "Ol_Chiki". 4858 */ 4859 OL_CHIKI, 4860 4861 /** 4862 * Unicode script "Vai". 4863 */ 4864 VAI, 4865 4866 /** 4867 * Unicode script "Saurashtra". 4868 */ 4869 SAURASHTRA, 4870 4871 /** 4872 * Unicode script "Kayah_Li". 4873 */ 4874 KAYAH_LI, 4875 4876 /** 4877 * Unicode script "Rejang". 4878 */ 4879 REJANG, 4880 4881 /** 4882 * Unicode script "Lycian". 4883 */ 4884 LYCIAN, 4885 4886 /** 4887 * Unicode script "Carian". 4888 */ 4889 CARIAN, 4890 4891 /** 4892 * Unicode script "Lydian". 4893 */ 4894 LYDIAN, 4895 4896 /** 4897 * Unicode script "Cham". 4898 */ 4899 CHAM, 4900 4901 /** 4902 * Unicode script "Tai_Tham". 4903 */ 4904 TAI_THAM, 4905 4906 /** 4907 * Unicode script "Tai_Viet". 4908 */ 4909 TAI_VIET, 4910 4911 /** 4912 * Unicode script "Avestan". 4913 */ 4914 AVESTAN, 4915 4916 /** 4917 * Unicode script "Egyptian_Hieroglyphs". 4918 */ 4919 EGYPTIAN_HIEROGLYPHS, 4920 4921 /** 4922 * Unicode script "Samaritan". 4923 */ 4924 SAMARITAN, 4925 4926 /** 4927 * Unicode script "Mandaic". 4928 */ 4929 MANDAIC, 4930 4931 /** 4932 * Unicode script "Lisu". 4933 */ 4934 LISU, 4935 4936 /** 4937 * Unicode script "Bamum". 4938 */ 4939 BAMUM, 4940 4941 /** 4942 * Unicode script "Javanese". 4943 */ 4944 JAVANESE, 4945 4946 /** 4947 * Unicode script "Meetei_Mayek". 4948 */ 4949 MEETEI_MAYEK, 4950 4951 /** 4952 * Unicode script "Imperial_Aramaic". 4953 */ 4954 IMPERIAL_ARAMAIC, 4955 4956 /** 4957 * Unicode script "Old_South_Arabian". 4958 */ 4959 OLD_SOUTH_ARABIAN, 4960 4961 /** 4962 * Unicode script "Inscriptional_Parthian". 4963 */ 4964 INSCRIPTIONAL_PARTHIAN, 4965 4966 /** 4967 * Unicode script "Inscriptional_Pahlavi". 4968 */ 4969 INSCRIPTIONAL_PAHLAVI, 4970 4971 /** 4972 * Unicode script "Old_Turkic". 4973 */ 4974 OLD_TURKIC, 4975 4976 /** 4977 * Unicode script "Brahmi". 4978 */ 4979 BRAHMI, 4980 4981 /** 4982 * Unicode script "Kaithi". 4983 */ 4984 KAITHI, 4985 4986 /** 4987 * Unicode script "Meroitic Hieroglyphs". 4988 * @since 1.8 4989 */ 4990 MEROITIC_HIEROGLYPHS, 4991 4992 /** 4993 * Unicode script "Meroitic Cursive". 4994 * @since 1.8 4995 */ 4996 MEROITIC_CURSIVE, 4997 4998 /** 4999 * Unicode script "Sora Sompeng". 5000 * @since 1.8 5001 */ 5002 SORA_SOMPENG, 5003 5004 /** 5005 * Unicode script "Chakma". 5006 * @since 1.8 5007 */ 5008 CHAKMA, 5009 5010 /** 5011 * Unicode script "Sharada". 5012 * @since 1.8 5013 */ 5014 SHARADA, 5015 5016 /** 5017 * Unicode script "Takri". 5018 * @since 1.8 5019 */ 5020 TAKRI, 5021 5022 /** 5023 * Unicode script "Miao". 5024 * @since 1.8 5025 */ 5026 MIAO, 5027 5028 /** 5029 * Unicode script "Caucasian Albanian". 5030 * @since 9 5031 */ 5032 CAUCASIAN_ALBANIAN, 5033 5034 /** 5035 * Unicode script "Bassa Vah". 5036 * @since 9 5037 */ 5038 BASSA_VAH, 5039 5040 /** 5041 * Unicode script "Duployan". 5042 * @since 9 5043 */ 5044 DUPLOYAN, 5045 5046 /** 5047 * Unicode script "Elbasan". 5048 * @since 9 5049 */ 5050 ELBASAN, 5051 5052 /** 5053 * Unicode script "Grantha". 5054 * @since 9 5055 */ 5056 GRANTHA, 5057 5058 /** 5059 * Unicode script "Pahawh Hmong". 5060 * @since 9 5061 */ 5062 PAHAWH_HMONG, 5063 5064 /** 5065 * Unicode script "Khojki". 5066 * @since 9 5067 */ 5068 KHOJKI, 5069 5070 /** 5071 * Unicode script "Linear A". 5072 * @since 9 5073 */ 5074 LINEAR_A, 5075 5076 /** 5077 * Unicode script "Mahajani". 5078 * @since 9 5079 */ 5080 MAHAJANI, 5081 5082 /** 5083 * Unicode script "Manichaean". 5084 * @since 9 5085 */ 5086 MANICHAEAN, 5087 5088 /** 5089 * Unicode script "Mende Kikakui". 5090 * @since 9 5091 */ 5092 MENDE_KIKAKUI, 5093 5094 /** 5095 * Unicode script "Modi". 5096 * @since 9 5097 */ 5098 MODI, 5099 5100 /** 5101 * Unicode script "Mro". 5102 * @since 9 5103 */ 5104 MRO, 5105 5106 /** 5107 * Unicode script "Old North Arabian". 5108 * @since 9 5109 */ 5110 OLD_NORTH_ARABIAN, 5111 5112 /** 5113 * Unicode script "Nabataean". 5114 * @since 9 5115 */ 5116 NABATAEAN, 5117 5118 /** 5119 * Unicode script "Palmyrene". 5120 * @since 9 5121 */ 5122 PALMYRENE, 5123 5124 /** 5125 * Unicode script "Pau Cin Hau". 5126 * @since 9 5127 */ 5128 PAU_CIN_HAU, 5129 5130 /** 5131 * Unicode script "Old Permic". 5132 * @since 9 5133 */ 5134 OLD_PERMIC, 5135 5136 /** 5137 * Unicode script "Psalter Pahlavi". 5138 * @since 9 5139 */ 5140 PSALTER_PAHLAVI, 5141 5142 /** 5143 * Unicode script "Siddham". 5144 * @since 9 5145 */ 5146 SIDDHAM, 5147 5148 /** 5149 * Unicode script "Khudawadi". 5150 * @since 9 5151 */ 5152 KHUDAWADI, 5153 5154 /** 5155 * Unicode script "Tirhuta". 5156 * @since 9 5157 */ 5158 TIRHUTA, 5159 5160 /** 5161 * Unicode script "Warang Citi". 5162 * @since 9 5163 */ 5164 WARANG_CITI, 5165 5166 /** 5167 * Unicode script "Ahom". 5168 * @since 9 5169 */ 5170 AHOM, 5171 5172 /** 5173 * Unicode script "Anatolian Hieroglyphs". 5174 * @since 9 5175 */ 5176 ANATOLIAN_HIEROGLYPHS, 5177 5178 /** 5179 * Unicode script "Hatran". 5180 * @since 9 5181 */ 5182 HATRAN, 5183 5184 /** 5185 * Unicode script "Multani". 5186 * @since 9 5187 */ 5188 MULTANI, 5189 5190 /** 5191 * Unicode script "Old Hungarian". 5192 * @since 9 5193 */ 5194 OLD_HUNGARIAN, 5195 5196 /** 5197 * Unicode script "SignWriting". 5198 * @since 9 5199 */ 5200 SIGNWRITING, 5201 5202 /** 5203 * Unicode script "Adlam". 5204 * @since 11 5205 */ 5206 ADLAM, 5207 5208 /** 5209 * Unicode script "Bhaiksuki". 5210 * @since 11 5211 */ 5212 BHAIKSUKI, 5213 5214 /** 5215 * Unicode script "Marchen". 5216 * @since 11 5217 */ 5218 MARCHEN, 5219 5220 /** 5221 * Unicode script "Newa". 5222 * @since 11 5223 */ 5224 NEWA, 5225 5226 /** 5227 * Unicode script "Osage". 5228 * @since 11 5229 */ 5230 OSAGE, 5231 5232 /** 5233 * Unicode script "Tangut". 5234 * @since 11 5235 */ 5236 TANGUT, 5237 5238 /** 5239 * Unicode script "Masaram Gondi". 5240 * @since 11 5241 */ 5242 MASARAM_GONDI, 5243 5244 /** 5245 * Unicode script "Nushu". 5246 * @since 11 5247 */ 5248 NUSHU, 5249 5250 /** 5251 * Unicode script "Soyombo". 5252 * @since 11 5253 */ 5254 SOYOMBO, 5255 5256 /** 5257 * Unicode script "Zanabazar Square". 5258 * @since 11 5259 */ 5260 ZANABAZAR_SQUARE, 5261 5262 /** 5263 * Unicode script "Hanifi Rohingya". 5264 * @since 12 5265 */ 5266 HANIFI_ROHINGYA, 5267 5268 /** 5269 * Unicode script "Old Sogdian". 5270 * @since 12 5271 */ 5272 OLD_SOGDIAN, 5273 5274 /** 5275 * Unicode script "Sogdian". 5276 * @since 12 5277 */ 5278 SOGDIAN, 5279 5280 /** 5281 * Unicode script "Dogra". 5282 * @since 12 5283 */ 5284 DOGRA, 5285 5286 /** 5287 * Unicode script "Gunjala Gondi". 5288 * @since 12 5289 */ 5290 GUNJALA_GONDI, 5291 5292 /** 5293 * Unicode script "Makasar". 5294 * @since 12 5295 */ 5296 MAKASAR, 5297 5298 /** 5299 * Unicode script "Medefaidrin". 5300 * @since 12 5301 */ 5302 MEDEFAIDRIN, 5303 5304 /** 5305 * Unicode script "Elymaic". 5306 * @since 13 5307 */ 5308 ELYMAIC, 5309 5310 /** 5311 * Unicode script "Nandinagari". 5312 * @since 13 5313 */ 5314 NANDINAGARI, 5315 5316 /** 5317 * Unicode script "Nyiakeng Puachue Hmong". 5318 * @since 13 5319 */ 5320 NYIAKENG_PUACHUE_HMONG, 5321 5322 /** 5323 * Unicode script "Wancho". 5324 * @since 13 5325 */ 5326 WANCHO, 5327 5328 /** 5329 * Unicode script "Yezidi". 5330 * @since 15 5331 */ 5332 YEZIDI, 5333 5334 /** 5335 * Unicode script "Chorasmian". 5336 * @since 15 5337 */ 5338 CHORASMIAN, 5339 5340 /** 5341 * Unicode script "Dives Akuru". 5342 * @since 15 5343 */ 5344 DIVES_AKURU, 5345 5346 /** 5347 * Unicode script "Khitan Small Script". 5348 * @since 15 5349 */ 5350 KHITAN_SMALL_SCRIPT, 5351 5352 /** 5353 * Unicode script "Vithkuqi". 5354 * @since 19 5355 */ 5356 VITHKUQI, 5357 5358 /** 5359 * Unicode script "Old Uyghur". 5360 * @since 19 5361 */ 5362 OLD_UYGHUR, 5363 5364 /** 5365 * Unicode script "Cypro Minoan". 5366 * @since 19 5367 */ 5368 CYPRO_MINOAN, 5369 5370 /** 5371 * Unicode script "Tangsa". 5372 * @since 19 5373 */ 5374 TANGSA, 5375 5376 /** 5377 * Unicode script "Toto". 5378 * @since 19 5379 */ 5380 TOTO, 5381 5382 /** 5383 * Unicode script "Kawi". 5384 * @since 20 5385 */ 5386 KAWI, 5387 5388 /** 5389 * Unicode script "Nag Mundari". 5390 * @since 20 5391 */ 5392 NAG_MUNDARI, 5393 5394 /** 5395 * Unicode script "Unknown". 5396 */ 5397 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5398 5399 private static final int[] scriptStarts = { 5400 0x0000, // 0000..0040; COMMON 5401 0x0041, // 0041..005A; LATIN 5402 0x005B, // 005B..0060; COMMON 5403 0x0061, // 0061..007A; LATIN 5404 0x007B, // 007B..00A9; COMMON 5405 0x00AA, // 00AA ; LATIN 5406 0x00AB, // 00AB..00B9; COMMON 5407 0x00BA, // 00BA ; LATIN 5408 0x00BB, // 00BB..00BF; COMMON 5409 0x00C0, // 00C0..00D6; LATIN 5410 0x00D7, // 00D7 ; COMMON 5411 0x00D8, // 00D8..00F6; LATIN 5412 0x00F7, // 00F7 ; COMMON 5413 0x00F8, // 00F8..02B8; LATIN 5414 0x02B9, // 02B9..02DF; COMMON 5415 0x02E0, // 02E0..02E4; LATIN 5416 0x02E5, // 02E5..02E9; COMMON 5417 0x02EA, // 02EA..02EB; BOPOMOFO 5418 0x02EC, // 02EC..02FF; COMMON 5419 0x0300, // 0300..036F; INHERITED 5420 0x0370, // 0370..0373; GREEK 5421 0x0374, // 0374 ; COMMON 5422 0x0375, // 0375..0377; GREEK 5423 0x0378, // 0378..0379; UNKNOWN 5424 0x037A, // 037A..037D; GREEK 5425 0x037E, // 037E ; COMMON 5426 0x037F, // 037F ; GREEK 5427 0x0380, // 0380..0383; UNKNOWN 5428 0x0384, // 0384 ; GREEK 5429 0x0385, // 0385 ; COMMON 5430 0x0386, // 0386 ; GREEK 5431 0x0387, // 0387 ; COMMON 5432 0x0388, // 0388..038A; GREEK 5433 0x038B, // 038B ; UNKNOWN 5434 0x038C, // 038C ; GREEK 5435 0x038D, // 038D ; UNKNOWN 5436 0x038E, // 038E..03A1; GREEK 5437 0x03A2, // 03A2 ; UNKNOWN 5438 0x03A3, // 03A3..03E1; GREEK 5439 0x03E2, // 03E2..03EF; COPTIC 5440 0x03F0, // 03F0..03FF; GREEK 5441 0x0400, // 0400..0484; CYRILLIC 5442 0x0485, // 0485..0486; INHERITED 5443 0x0487, // 0487..052F; CYRILLIC 5444 0x0530, // 0530 ; UNKNOWN 5445 0x0531, // 0531..0556; ARMENIAN 5446 0x0557, // 0557..0558; UNKNOWN 5447 0x0559, // 0559..058A; ARMENIAN 5448 0x058B, // 058B..058C; UNKNOWN 5449 0x058D, // 058D..058F; ARMENIAN 5450 0x0590, // 0590 ; UNKNOWN 5451 0x0591, // 0591..05C7; HEBREW 5452 0x05C8, // 05C8..05CF; UNKNOWN 5453 0x05D0, // 05D0..05EA; HEBREW 5454 0x05EB, // 05EB..05EE; UNKNOWN 5455 0x05EF, // 05EF..05F4; HEBREW 5456 0x05F5, // 05F5..05FF; UNKNOWN 5457 0x0600, // 0600..0604; ARABIC 5458 0x0605, // 0605 ; COMMON 5459 0x0606, // 0606..060B; ARABIC 5460 0x060C, // 060C ; COMMON 5461 0x060D, // 060D..061A; ARABIC 5462 0x061B, // 061B ; COMMON 5463 0x061C, // 061C..061E; ARABIC 5464 0x061F, // 061F ; COMMON 5465 0x0620, // 0620..063F; ARABIC 5466 0x0640, // 0640 ; COMMON 5467 0x0641, // 0641..064A; ARABIC 5468 0x064B, // 064B..0655; INHERITED 5469 0x0656, // 0656..066F; ARABIC 5470 0x0670, // 0670 ; INHERITED 5471 0x0671, // 0671..06DC; ARABIC 5472 0x06DD, // 06DD ; COMMON 5473 0x06DE, // 06DE..06FF; ARABIC 5474 0x0700, // 0700..070D; SYRIAC 5475 0x070E, // 070E ; UNKNOWN 5476 0x070F, // 070F..074A; SYRIAC 5477 0x074B, // 074B..074C; UNKNOWN 5478 0x074D, // 074D..074F; SYRIAC 5479 0x0750, // 0750..077F; ARABIC 5480 0x0780, // 0780..07B1; THAANA 5481 0x07B2, // 07B2..07BF; UNKNOWN 5482 0x07C0, // 07C0..07FA; NKO 5483 0x07FB, // 07FB..07FC; UNKNOWN 5484 0x07FD, // 07FD..07FF; NKO 5485 0x0800, // 0800..082D; SAMARITAN 5486 0x082E, // 082E..082F; UNKNOWN 5487 0x0830, // 0830..083E; SAMARITAN 5488 0x083F, // 083F ; UNKNOWN 5489 0x0840, // 0840..085B; MANDAIC 5490 0x085C, // 085C..085D; UNKNOWN 5491 0x085E, // 085E ; MANDAIC 5492 0x085F, // 085F ; UNKNOWN 5493 0x0860, // 0860..086A; SYRIAC 5494 0x086B, // 086B..086F; UNKNOWN 5495 0x0870, // 0870..088E; ARABIC 5496 0x088F, // 088F ; UNKNOWN 5497 0x0890, // 0890..0891; ARABIC 5498 0x0892, // 0892..0897; UNKNOWN 5499 0x0898, // 0898..08E1; ARABIC 5500 0x08E2, // 08E2 ; COMMON 5501 0x08E3, // 08E3..08FF; ARABIC 5502 0x0900, // 0900..0950; DEVANAGARI 5503 0x0951, // 0951..0954; INHERITED 5504 0x0955, // 0955..0963; DEVANAGARI 5505 0x0964, // 0964..0965; COMMON 5506 0x0966, // 0966..097F; DEVANAGARI 5507 0x0980, // 0980..0983; BENGALI 5508 0x0984, // 0984 ; UNKNOWN 5509 0x0985, // 0985..098C; BENGALI 5510 0x098D, // 098D..098E; UNKNOWN 5511 0x098F, // 098F..0990; BENGALI 5512 0x0991, // 0991..0992; UNKNOWN 5513 0x0993, // 0993..09A8; BENGALI 5514 0x09A9, // 09A9 ; UNKNOWN 5515 0x09AA, // 09AA..09B0; BENGALI 5516 0x09B1, // 09B1 ; UNKNOWN 5517 0x09B2, // 09B2 ; BENGALI 5518 0x09B3, // 09B3..09B5; UNKNOWN 5519 0x09B6, // 09B6..09B9; BENGALI 5520 0x09BA, // 09BA..09BB; UNKNOWN 5521 0x09BC, // 09BC..09C4; BENGALI 5522 0x09C5, // 09C5..09C6; UNKNOWN 5523 0x09C7, // 09C7..09C8; BENGALI 5524 0x09C9, // 09C9..09CA; UNKNOWN 5525 0x09CB, // 09CB..09CE; BENGALI 5526 0x09CF, // 09CF..09D6; UNKNOWN 5527 0x09D7, // 09D7 ; BENGALI 5528 0x09D8, // 09D8..09DB; UNKNOWN 5529 0x09DC, // 09DC..09DD; BENGALI 5530 0x09DE, // 09DE ; UNKNOWN 5531 0x09DF, // 09DF..09E3; BENGALI 5532 0x09E4, // 09E4..09E5; UNKNOWN 5533 0x09E6, // 09E6..09FE; BENGALI 5534 0x09FF, // 09FF..0A00; UNKNOWN 5535 0x0A01, // 0A01..0A03; GURMUKHI 5536 0x0A04, // 0A04 ; UNKNOWN 5537 0x0A05, // 0A05..0A0A; GURMUKHI 5538 0x0A0B, // 0A0B..0A0E; UNKNOWN 5539 0x0A0F, // 0A0F..0A10; GURMUKHI 5540 0x0A11, // 0A11..0A12; UNKNOWN 5541 0x0A13, // 0A13..0A28; GURMUKHI 5542 0x0A29, // 0A29 ; UNKNOWN 5543 0x0A2A, // 0A2A..0A30; GURMUKHI 5544 0x0A31, // 0A31 ; UNKNOWN 5545 0x0A32, // 0A32..0A33; GURMUKHI 5546 0x0A34, // 0A34 ; UNKNOWN 5547 0x0A35, // 0A35..0A36; GURMUKHI 5548 0x0A37, // 0A37 ; UNKNOWN 5549 0x0A38, // 0A38..0A39; GURMUKHI 5550 0x0A3A, // 0A3A..0A3B; UNKNOWN 5551 0x0A3C, // 0A3C ; GURMUKHI 5552 0x0A3D, // 0A3D ; UNKNOWN 5553 0x0A3E, // 0A3E..0A42; GURMUKHI 5554 0x0A43, // 0A43..0A46; UNKNOWN 5555 0x0A47, // 0A47..0A48; GURMUKHI 5556 0x0A49, // 0A49..0A4A; UNKNOWN 5557 0x0A4B, // 0A4B..0A4D; GURMUKHI 5558 0x0A4E, // 0A4E..0A50; UNKNOWN 5559 0x0A51, // 0A51 ; GURMUKHI 5560 0x0A52, // 0A52..0A58; UNKNOWN 5561 0x0A59, // 0A59..0A5C; GURMUKHI 5562 0x0A5D, // 0A5D ; UNKNOWN 5563 0x0A5E, // 0A5E ; GURMUKHI 5564 0x0A5F, // 0A5F..0A65; UNKNOWN 5565 0x0A66, // 0A66..0A76; GURMUKHI 5566 0x0A77, // 0A77..0A80; UNKNOWN 5567 0x0A81, // 0A81..0A83; GUJARATI 5568 0x0A84, // 0A84 ; UNKNOWN 5569 0x0A85, // 0A85..0A8D; GUJARATI 5570 0x0A8E, // 0A8E ; UNKNOWN 5571 0x0A8F, // 0A8F..0A91; GUJARATI 5572 0x0A92, // 0A92 ; UNKNOWN 5573 0x0A93, // 0A93..0AA8; GUJARATI 5574 0x0AA9, // 0AA9 ; UNKNOWN 5575 0x0AAA, // 0AAA..0AB0; GUJARATI 5576 0x0AB1, // 0AB1 ; UNKNOWN 5577 0x0AB2, // 0AB2..0AB3; GUJARATI 5578 0x0AB4, // 0AB4 ; UNKNOWN 5579 0x0AB5, // 0AB5..0AB9; GUJARATI 5580 0x0ABA, // 0ABA..0ABB; UNKNOWN 5581 0x0ABC, // 0ABC..0AC5; GUJARATI 5582 0x0AC6, // 0AC6 ; UNKNOWN 5583 0x0AC7, // 0AC7..0AC9; GUJARATI 5584 0x0ACA, // 0ACA ; UNKNOWN 5585 0x0ACB, // 0ACB..0ACD; GUJARATI 5586 0x0ACE, // 0ACE..0ACF; UNKNOWN 5587 0x0AD0, // 0AD0 ; GUJARATI 5588 0x0AD1, // 0AD1..0ADF; UNKNOWN 5589 0x0AE0, // 0AE0..0AE3; GUJARATI 5590 0x0AE4, // 0AE4..0AE5; UNKNOWN 5591 0x0AE6, // 0AE6..0AF1; GUJARATI 5592 0x0AF2, // 0AF2..0AF8; UNKNOWN 5593 0x0AF9, // 0AF9..0AFF; GUJARATI 5594 0x0B00, // 0B00 ; UNKNOWN 5595 0x0B01, // 0B01..0B03; ORIYA 5596 0x0B04, // 0B04 ; UNKNOWN 5597 0x0B05, // 0B05..0B0C; ORIYA 5598 0x0B0D, // 0B0D..0B0E; UNKNOWN 5599 0x0B0F, // 0B0F..0B10; ORIYA 5600 0x0B11, // 0B11..0B12; UNKNOWN 5601 0x0B13, // 0B13..0B28; ORIYA 5602 0x0B29, // 0B29 ; UNKNOWN 5603 0x0B2A, // 0B2A..0B30; ORIYA 5604 0x0B31, // 0B31 ; UNKNOWN 5605 0x0B32, // 0B32..0B33; ORIYA 5606 0x0B34, // 0B34 ; UNKNOWN 5607 0x0B35, // 0B35..0B39; ORIYA 5608 0x0B3A, // 0B3A..0B3B; UNKNOWN 5609 0x0B3C, // 0B3C..0B44; ORIYA 5610 0x0B45, // 0B45..0B46; UNKNOWN 5611 0x0B47, // 0B47..0B48; ORIYA 5612 0x0B49, // 0B49..0B4A; UNKNOWN 5613 0x0B4B, // 0B4B..0B4D; ORIYA 5614 0x0B4E, // 0B4E..0B54; UNKNOWN 5615 0x0B55, // 0B55..0B57; ORIYA 5616 0x0B58, // 0B58..0B5B; UNKNOWN 5617 0x0B5C, // 0B5C..0B5D; ORIYA 5618 0x0B5E, // 0B5E ; UNKNOWN 5619 0x0B5F, // 0B5F..0B63; ORIYA 5620 0x0B64, // 0B64..0B65; UNKNOWN 5621 0x0B66, // 0B66..0B77; ORIYA 5622 0x0B78, // 0B78..0B81; UNKNOWN 5623 0x0B82, // 0B82..0B83; TAMIL 5624 0x0B84, // 0B84 ; UNKNOWN 5625 0x0B85, // 0B85..0B8A; TAMIL 5626 0x0B8B, // 0B8B..0B8D; UNKNOWN 5627 0x0B8E, // 0B8E..0B90; TAMIL 5628 0x0B91, // 0B91 ; UNKNOWN 5629 0x0B92, // 0B92..0B95; TAMIL 5630 0x0B96, // 0B96..0B98; UNKNOWN 5631 0x0B99, // 0B99..0B9A; TAMIL 5632 0x0B9B, // 0B9B ; UNKNOWN 5633 0x0B9C, // 0B9C ; TAMIL 5634 0x0B9D, // 0B9D ; UNKNOWN 5635 0x0B9E, // 0B9E..0B9F; TAMIL 5636 0x0BA0, // 0BA0..0BA2; UNKNOWN 5637 0x0BA3, // 0BA3..0BA4; TAMIL 5638 0x0BA5, // 0BA5..0BA7; UNKNOWN 5639 0x0BA8, // 0BA8..0BAA; TAMIL 5640 0x0BAB, // 0BAB..0BAD; UNKNOWN 5641 0x0BAE, // 0BAE..0BB9; TAMIL 5642 0x0BBA, // 0BBA..0BBD; UNKNOWN 5643 0x0BBE, // 0BBE..0BC2; TAMIL 5644 0x0BC3, // 0BC3..0BC5; UNKNOWN 5645 0x0BC6, // 0BC6..0BC8; TAMIL 5646 0x0BC9, // 0BC9 ; UNKNOWN 5647 0x0BCA, // 0BCA..0BCD; TAMIL 5648 0x0BCE, // 0BCE..0BCF; UNKNOWN 5649 0x0BD0, // 0BD0 ; TAMIL 5650 0x0BD1, // 0BD1..0BD6; UNKNOWN 5651 0x0BD7, // 0BD7 ; TAMIL 5652 0x0BD8, // 0BD8..0BE5; UNKNOWN 5653 0x0BE6, // 0BE6..0BFA; TAMIL 5654 0x0BFB, // 0BFB..0BFF; UNKNOWN 5655 0x0C00, // 0C00..0C0C; TELUGU 5656 0x0C0D, // 0C0D ; UNKNOWN 5657 0x0C0E, // 0C0E..0C10; TELUGU 5658 0x0C11, // 0C11 ; UNKNOWN 5659 0x0C12, // 0C12..0C28; TELUGU 5660 0x0C29, // 0C29 ; UNKNOWN 5661 0x0C2A, // 0C2A..0C39; TELUGU 5662 0x0C3A, // 0C3A..0C3B; UNKNOWN 5663 0x0C3C, // 0C3C..0C44; TELUGU 5664 0x0C45, // 0C45 ; UNKNOWN 5665 0x0C46, // 0C46..0C48; TELUGU 5666 0x0C49, // 0C49 ; UNKNOWN 5667 0x0C4A, // 0C4A..0C4D; TELUGU 5668 0x0C4E, // 0C4E..0C54; UNKNOWN 5669 0x0C55, // 0C55..0C56; TELUGU 5670 0x0C57, // 0C57 ; UNKNOWN 5671 0x0C58, // 0C58..0C5A; TELUGU 5672 0x0C5B, // 0C5B..0C5C; UNKNOWN 5673 0x0C5D, // 0C5D ; TELUGU 5674 0x0C5E, // 0C5E..0C5F; UNKNOWN 5675 0x0C60, // 0C60..0C63; TELUGU 5676 0x0C64, // 0C64..0C65; UNKNOWN 5677 0x0C66, // 0C66..0C6F; TELUGU 5678 0x0C70, // 0C70..0C76; UNKNOWN 5679 0x0C77, // 0C77..0C7F; TELUGU 5680 0x0C80, // 0C80..0C8C; KANNADA 5681 0x0C8D, // 0C8D ; UNKNOWN 5682 0x0C8E, // 0C8E..0C90; KANNADA 5683 0x0C91, // 0C91 ; UNKNOWN 5684 0x0C92, // 0C92..0CA8; KANNADA 5685 0x0CA9, // 0CA9 ; UNKNOWN 5686 0x0CAA, // 0CAA..0CB3; KANNADA 5687 0x0CB4, // 0CB4 ; UNKNOWN 5688 0x0CB5, // 0CB5..0CB9; KANNADA 5689 0x0CBA, // 0CBA..0CBB; UNKNOWN 5690 0x0CBC, // 0CBC..0CC4; KANNADA 5691 0x0CC5, // 0CC5 ; UNKNOWN 5692 0x0CC6, // 0CC6..0CC8; KANNADA 5693 0x0CC9, // 0CC9 ; UNKNOWN 5694 0x0CCA, // 0CCA..0CCD; KANNADA 5695 0x0CCE, // 0CCE..0CD4; UNKNOWN 5696 0x0CD5, // 0CD5..0CD6; KANNADA 5697 0x0CD7, // 0CD7..0CDC; UNKNOWN 5698 0x0CDD, // 0CDD..0CDE; KANNADA 5699 0x0CDF, // 0CDF ; UNKNOWN 5700 0x0CE0, // 0CE0..0CE3; KANNADA 5701 0x0CE4, // 0CE4..0CE5; UNKNOWN 5702 0x0CE6, // 0CE6..0CEF; KANNADA 5703 0x0CF0, // 0CF0 ; UNKNOWN 5704 0x0CF1, // 0CF1..0CF3; KANNADA 5705 0x0CF4, // 0CF4..0CFF; UNKNOWN 5706 0x0D00, // 0D00..0D0C; MALAYALAM 5707 0x0D0D, // 0D0D ; UNKNOWN 5708 0x0D0E, // 0D0E..0D10; MALAYALAM 5709 0x0D11, // 0D11 ; UNKNOWN 5710 0x0D12, // 0D12..0D44; MALAYALAM 5711 0x0D45, // 0D45 ; UNKNOWN 5712 0x0D46, // 0D46..0D48; MALAYALAM 5713 0x0D49, // 0D49 ; UNKNOWN 5714 0x0D4A, // 0D4A..0D4F; MALAYALAM 5715 0x0D50, // 0D50..0D53; UNKNOWN 5716 0x0D54, // 0D54..0D63; MALAYALAM 5717 0x0D64, // 0D64..0D65; UNKNOWN 5718 0x0D66, // 0D66..0D7F; MALAYALAM 5719 0x0D80, // 0D80 ; UNKNOWN 5720 0x0D81, // 0D81..0D83; SINHALA 5721 0x0D84, // 0D84 ; UNKNOWN 5722 0x0D85, // 0D85..0D96; SINHALA 5723 0x0D97, // 0D97..0D99; UNKNOWN 5724 0x0D9A, // 0D9A..0DB1; SINHALA 5725 0x0DB2, // 0DB2 ; UNKNOWN 5726 0x0DB3, // 0DB3..0DBB; SINHALA 5727 0x0DBC, // 0DBC ; UNKNOWN 5728 0x0DBD, // 0DBD ; SINHALA 5729 0x0DBE, // 0DBE..0DBF; UNKNOWN 5730 0x0DC0, // 0DC0..0DC6; SINHALA 5731 0x0DC7, // 0DC7..0DC9; UNKNOWN 5732 0x0DCA, // 0DCA ; SINHALA 5733 0x0DCB, // 0DCB..0DCE; UNKNOWN 5734 0x0DCF, // 0DCF..0DD4; SINHALA 5735 0x0DD5, // 0DD5 ; UNKNOWN 5736 0x0DD6, // 0DD6 ; SINHALA 5737 0x0DD7, // 0DD7 ; UNKNOWN 5738 0x0DD8, // 0DD8..0DDF; SINHALA 5739 0x0DE0, // 0DE0..0DE5; UNKNOWN 5740 0x0DE6, // 0DE6..0DEF; SINHALA 5741 0x0DF0, // 0DF0..0DF1; UNKNOWN 5742 0x0DF2, // 0DF2..0DF4; SINHALA 5743 0x0DF5, // 0DF5..0E00; UNKNOWN 5744 0x0E01, // 0E01..0E3A; THAI 5745 0x0E3B, // 0E3B..0E3E; UNKNOWN 5746 0x0E3F, // 0E3F ; COMMON 5747 0x0E40, // 0E40..0E5B; THAI 5748 0x0E5C, // 0E5C..0E80; UNKNOWN 5749 0x0E81, // 0E81..0E82; LAO 5750 0x0E83, // 0E83 ; UNKNOWN 5751 0x0E84, // 0E84 ; LAO 5752 0x0E85, // 0E85 ; UNKNOWN 5753 0x0E86, // 0E86..0E8A; LAO 5754 0x0E8B, // 0E8B ; UNKNOWN 5755 0x0E8C, // 0E8C..0EA3; LAO 5756 0x0EA4, // 0EA4 ; UNKNOWN 5757 0x0EA5, // 0EA5 ; LAO 5758 0x0EA6, // 0EA6 ; UNKNOWN 5759 0x0EA7, // 0EA7..0EBD; LAO 5760 0x0EBE, // 0EBE..0EBF; UNKNOWN 5761 0x0EC0, // 0EC0..0EC4; LAO 5762 0x0EC5, // 0EC5 ; UNKNOWN 5763 0x0EC6, // 0EC6 ; LAO 5764 0x0EC7, // 0EC7 ; UNKNOWN 5765 0x0EC8, // 0EC8..0ECE; LAO 5766 0x0ECF, // 0ECF ; UNKNOWN 5767 0x0ED0, // 0ED0..0ED9; LAO 5768 0x0EDA, // 0EDA..0EDB; UNKNOWN 5769 0x0EDC, // 0EDC..0EDF; LAO 5770 0x0EE0, // 0EE0..0EFF; UNKNOWN 5771 0x0F00, // 0F00..0F47; TIBETAN 5772 0x0F48, // 0F48 ; UNKNOWN 5773 0x0F49, // 0F49..0F6C; TIBETAN 5774 0x0F6D, // 0F6D..0F70; UNKNOWN 5775 0x0F71, // 0F71..0F97; TIBETAN 5776 0x0F98, // 0F98 ; UNKNOWN 5777 0x0F99, // 0F99..0FBC; TIBETAN 5778 0x0FBD, // 0FBD ; UNKNOWN 5779 0x0FBE, // 0FBE..0FCC; TIBETAN 5780 0x0FCD, // 0FCD ; UNKNOWN 5781 0x0FCE, // 0FCE..0FD4; TIBETAN 5782 0x0FD5, // 0FD5..0FD8; COMMON 5783 0x0FD9, // 0FD9..0FDA; TIBETAN 5784 0x0FDB, // 0FDB..0FFF; UNKNOWN 5785 0x1000, // 1000..109F; MYANMAR 5786 0x10A0, // 10A0..10C5; GEORGIAN 5787 0x10C6, // 10C6 ; UNKNOWN 5788 0x10C7, // 10C7 ; GEORGIAN 5789 0x10C8, // 10C8..10CC; UNKNOWN 5790 0x10CD, // 10CD ; GEORGIAN 5791 0x10CE, // 10CE..10CF; UNKNOWN 5792 0x10D0, // 10D0..10FA; GEORGIAN 5793 0x10FB, // 10FB ; COMMON 5794 0x10FC, // 10FC..10FF; GEORGIAN 5795 0x1100, // 1100..11FF; HANGUL 5796 0x1200, // 1200..1248; ETHIOPIC 5797 0x1249, // 1249 ; UNKNOWN 5798 0x124A, // 124A..124D; ETHIOPIC 5799 0x124E, // 124E..124F; UNKNOWN 5800 0x1250, // 1250..1256; ETHIOPIC 5801 0x1257, // 1257 ; UNKNOWN 5802 0x1258, // 1258 ; ETHIOPIC 5803 0x1259, // 1259 ; UNKNOWN 5804 0x125A, // 125A..125D; ETHIOPIC 5805 0x125E, // 125E..125F; UNKNOWN 5806 0x1260, // 1260..1288; ETHIOPIC 5807 0x1289, // 1289 ; UNKNOWN 5808 0x128A, // 128A..128D; ETHIOPIC 5809 0x128E, // 128E..128F; UNKNOWN 5810 0x1290, // 1290..12B0; ETHIOPIC 5811 0x12B1, // 12B1 ; UNKNOWN 5812 0x12B2, // 12B2..12B5; ETHIOPIC 5813 0x12B6, // 12B6..12B7; UNKNOWN 5814 0x12B8, // 12B8..12BE; ETHIOPIC 5815 0x12BF, // 12BF ; UNKNOWN 5816 0x12C0, // 12C0 ; ETHIOPIC 5817 0x12C1, // 12C1 ; UNKNOWN 5818 0x12C2, // 12C2..12C5; ETHIOPIC 5819 0x12C6, // 12C6..12C7; UNKNOWN 5820 0x12C8, // 12C8..12D6; ETHIOPIC 5821 0x12D7, // 12D7 ; UNKNOWN 5822 0x12D8, // 12D8..1310; ETHIOPIC 5823 0x1311, // 1311 ; UNKNOWN 5824 0x1312, // 1312..1315; ETHIOPIC 5825 0x1316, // 1316..1317; UNKNOWN 5826 0x1318, // 1318..135A; ETHIOPIC 5827 0x135B, // 135B..135C; UNKNOWN 5828 0x135D, // 135D..137C; ETHIOPIC 5829 0x137D, // 137D..137F; UNKNOWN 5830 0x1380, // 1380..1399; ETHIOPIC 5831 0x139A, // 139A..139F; UNKNOWN 5832 0x13A0, // 13A0..13F5; CHEROKEE 5833 0x13F6, // 13F6..13F7; UNKNOWN 5834 0x13F8, // 13F8..13FD; CHEROKEE 5835 0x13FE, // 13FE..13FF; UNKNOWN 5836 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5837 0x1680, // 1680..169C; OGHAM 5838 0x169D, // 169D..169F; UNKNOWN 5839 0x16A0, // 16A0..16EA; RUNIC 5840 0x16EB, // 16EB..16ED; COMMON 5841 0x16EE, // 16EE..16F8; RUNIC 5842 0x16F9, // 16F9..16FF; UNKNOWN 5843 0x1700, // 1700..1715; TAGALOG 5844 0x1716, // 1716..171E; UNKNOWN 5845 0x171F, // 171F ; TAGALOG 5846 0x1720, // 1720..1734; HANUNOO 5847 0x1735, // 1735..1736; COMMON 5848 0x1737, // 1737..173F; UNKNOWN 5849 0x1740, // 1740..1753; BUHID 5850 0x1754, // 1754..175F; UNKNOWN 5851 0x1760, // 1760..176C; TAGBANWA 5852 0x176D, // 176D ; UNKNOWN 5853 0x176E, // 176E..1770; TAGBANWA 5854 0x1771, // 1771 ; UNKNOWN 5855 0x1772, // 1772..1773; TAGBANWA 5856 0x1774, // 1774..177F; UNKNOWN 5857 0x1780, // 1780..17DD; KHMER 5858 0x17DE, // 17DE..17DF; UNKNOWN 5859 0x17E0, // 17E0..17E9; KHMER 5860 0x17EA, // 17EA..17EF; UNKNOWN 5861 0x17F0, // 17F0..17F9; KHMER 5862 0x17FA, // 17FA..17FF; UNKNOWN 5863 0x1800, // 1800..1801; MONGOLIAN 5864 0x1802, // 1802..1803; COMMON 5865 0x1804, // 1804 ; MONGOLIAN 5866 0x1805, // 1805 ; COMMON 5867 0x1806, // 1806..1819; MONGOLIAN 5868 0x181A, // 181A..181F; UNKNOWN 5869 0x1820, // 1820..1878; MONGOLIAN 5870 0x1879, // 1879..187F; UNKNOWN 5871 0x1880, // 1880..18AA; MONGOLIAN 5872 0x18AB, // 18AB..18AF; UNKNOWN 5873 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5874 0x18F6, // 18F6..18FF; UNKNOWN 5875 0x1900, // 1900..191E; LIMBU 5876 0x191F, // 191F ; UNKNOWN 5877 0x1920, // 1920..192B; LIMBU 5878 0x192C, // 192C..192F; UNKNOWN 5879 0x1930, // 1930..193B; LIMBU 5880 0x193C, // 193C..193F; UNKNOWN 5881 0x1940, // 1940 ; LIMBU 5882 0x1941, // 1941..1943; UNKNOWN 5883 0x1944, // 1944..194F; LIMBU 5884 0x1950, // 1950..196D; TAI_LE 5885 0x196E, // 196E..196F; UNKNOWN 5886 0x1970, // 1970..1974; TAI_LE 5887 0x1975, // 1975..197F; UNKNOWN 5888 0x1980, // 1980..19AB; NEW_TAI_LUE 5889 0x19AC, // 19AC..19AF; UNKNOWN 5890 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5891 0x19CA, // 19CA..19CF; UNKNOWN 5892 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5893 0x19DB, // 19DB..19DD; UNKNOWN 5894 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5895 0x19E0, // 19E0..19FF; KHMER 5896 0x1A00, // 1A00..1A1B; BUGINESE 5897 0x1A1C, // 1A1C..1A1D; UNKNOWN 5898 0x1A1E, // 1A1E..1A1F; BUGINESE 5899 0x1A20, // 1A20..1A5E; TAI_THAM 5900 0x1A5F, // 1A5F ; UNKNOWN 5901 0x1A60, // 1A60..1A7C; TAI_THAM 5902 0x1A7D, // 1A7D..1A7E; UNKNOWN 5903 0x1A7F, // 1A7F..1A89; TAI_THAM 5904 0x1A8A, // 1A8A..1A8F; UNKNOWN 5905 0x1A90, // 1A90..1A99; TAI_THAM 5906 0x1A9A, // 1A9A..1A9F; UNKNOWN 5907 0x1AA0, // 1AA0..1AAD; TAI_THAM 5908 0x1AAE, // 1AAE..1AAF; UNKNOWN 5909 0x1AB0, // 1AB0..1ACE; INHERITED 5910 0x1ACF, // 1ACF..1AFF; UNKNOWN 5911 0x1B00, // 1B00..1B4C; BALINESE 5912 0x1B4D, // 1B4D..1B4F; UNKNOWN 5913 0x1B50, // 1B50..1B7E; BALINESE 5914 0x1B7F, // 1B7F ; UNKNOWN 5915 0x1B80, // 1B80..1BBF; SUNDANESE 5916 0x1BC0, // 1BC0..1BF3; BATAK 5917 0x1BF4, // 1BF4..1BFB; UNKNOWN 5918 0x1BFC, // 1BFC..1BFF; BATAK 5919 0x1C00, // 1C00..1C37; LEPCHA 5920 0x1C38, // 1C38..1C3A; UNKNOWN 5921 0x1C3B, // 1C3B..1C49; LEPCHA 5922 0x1C4A, // 1C4A..1C4C; UNKNOWN 5923 0x1C4D, // 1C4D..1C4F; LEPCHA 5924 0x1C50, // 1C50..1C7F; OL_CHIKI 5925 0x1C80, // 1C80..1C88; CYRILLIC 5926 0x1C89, // 1C89..1C8F; UNKNOWN 5927 0x1C90, // 1C90..1CBA; GEORGIAN 5928 0x1CBB, // 1CBB..1CBC; UNKNOWN 5929 0x1CBD, // 1CBD..1CBF; GEORGIAN 5930 0x1CC0, // 1CC0..1CC7; SUNDANESE 5931 0x1CC8, // 1CC8..1CCF; UNKNOWN 5932 0x1CD0, // 1CD0..1CD2; INHERITED 5933 0x1CD3, // 1CD3 ; COMMON 5934 0x1CD4, // 1CD4..1CE0; INHERITED 5935 0x1CE1, // 1CE1 ; COMMON 5936 0x1CE2, // 1CE2..1CE8; INHERITED 5937 0x1CE9, // 1CE9..1CEC; COMMON 5938 0x1CED, // 1CED ; INHERITED 5939 0x1CEE, // 1CEE..1CF3; COMMON 5940 0x1CF4, // 1CF4 ; INHERITED 5941 0x1CF5, // 1CF5..1CF7; COMMON 5942 0x1CF8, // 1CF8..1CF9; INHERITED 5943 0x1CFA, // 1CFA ; COMMON 5944 0x1CFB, // 1CFB..1CFF; UNKNOWN 5945 0x1D00, // 1D00..1D25; LATIN 5946 0x1D26, // 1D26..1D2A; GREEK 5947 0x1D2B, // 1D2B ; CYRILLIC 5948 0x1D2C, // 1D2C..1D5C; LATIN 5949 0x1D5D, // 1D5D..1D61; GREEK 5950 0x1D62, // 1D62..1D65; LATIN 5951 0x1D66, // 1D66..1D6A; GREEK 5952 0x1D6B, // 1D6B..1D77; LATIN 5953 0x1D78, // 1D78 ; CYRILLIC 5954 0x1D79, // 1D79..1DBE; LATIN 5955 0x1DBF, // 1DBF ; GREEK 5956 0x1DC0, // 1DC0..1DFF; INHERITED 5957 0x1E00, // 1E00..1EFF; LATIN 5958 0x1F00, // 1F00..1F15; GREEK 5959 0x1F16, // 1F16..1F17; UNKNOWN 5960 0x1F18, // 1F18..1F1D; GREEK 5961 0x1F1E, // 1F1E..1F1F; UNKNOWN 5962 0x1F20, // 1F20..1F45; GREEK 5963 0x1F46, // 1F46..1F47; UNKNOWN 5964 0x1F48, // 1F48..1F4D; GREEK 5965 0x1F4E, // 1F4E..1F4F; UNKNOWN 5966 0x1F50, // 1F50..1F57; GREEK 5967 0x1F58, // 1F58 ; UNKNOWN 5968 0x1F59, // 1F59 ; GREEK 5969 0x1F5A, // 1F5A ; UNKNOWN 5970 0x1F5B, // 1F5B ; GREEK 5971 0x1F5C, // 1F5C ; UNKNOWN 5972 0x1F5D, // 1F5D ; GREEK 5973 0x1F5E, // 1F5E ; UNKNOWN 5974 0x1F5F, // 1F5F..1F7D; GREEK 5975 0x1F7E, // 1F7E..1F7F; UNKNOWN 5976 0x1F80, // 1F80..1FB4; GREEK 5977 0x1FB5, // 1FB5 ; UNKNOWN 5978 0x1FB6, // 1FB6..1FC4; GREEK 5979 0x1FC5, // 1FC5 ; UNKNOWN 5980 0x1FC6, // 1FC6..1FD3; GREEK 5981 0x1FD4, // 1FD4..1FD5; UNKNOWN 5982 0x1FD6, // 1FD6..1FDB; GREEK 5983 0x1FDC, // 1FDC ; UNKNOWN 5984 0x1FDD, // 1FDD..1FEF; GREEK 5985 0x1FF0, // 1FF0..1FF1; UNKNOWN 5986 0x1FF2, // 1FF2..1FF4; GREEK 5987 0x1FF5, // 1FF5 ; UNKNOWN 5988 0x1FF6, // 1FF6..1FFE; GREEK 5989 0x1FFF, // 1FFF ; UNKNOWN 5990 0x2000, // 2000..200B; COMMON 5991 0x200C, // 200C..200D; INHERITED 5992 0x200E, // 200E..2064; COMMON 5993 0x2065, // 2065 ; UNKNOWN 5994 0x2066, // 2066..2070; COMMON 5995 0x2071, // 2071 ; LATIN 5996 0x2072, // 2072..2073; UNKNOWN 5997 0x2074, // 2074..207E; COMMON 5998 0x207F, // 207F ; LATIN 5999 0x2080, // 2080..208E; COMMON 6000 0x208F, // 208F ; UNKNOWN 6001 0x2090, // 2090..209C; LATIN 6002 0x209D, // 209D..209F; UNKNOWN 6003 0x20A0, // 20A0..20C0; COMMON 6004 0x20C1, // 20C1..20CF; UNKNOWN 6005 0x20D0, // 20D0..20F0; INHERITED 6006 0x20F1, // 20F1..20FF; UNKNOWN 6007 0x2100, // 2100..2125; COMMON 6008 0x2126, // 2126 ; GREEK 6009 0x2127, // 2127..2129; COMMON 6010 0x212A, // 212A..212B; LATIN 6011 0x212C, // 212C..2131; COMMON 6012 0x2132, // 2132 ; LATIN 6013 0x2133, // 2133..214D; COMMON 6014 0x214E, // 214E ; LATIN 6015 0x214F, // 214F..215F; COMMON 6016 0x2160, // 2160..2188; LATIN 6017 0x2189, // 2189..218B; COMMON 6018 0x218C, // 218C..218F; UNKNOWN 6019 0x2190, // 2190..2426; COMMON 6020 0x2427, // 2427..243F; UNKNOWN 6021 0x2440, // 2440..244A; COMMON 6022 0x244B, // 244B..245F; UNKNOWN 6023 0x2460, // 2460..27FF; COMMON 6024 0x2800, // 2800..28FF; BRAILLE 6025 0x2900, // 2900..2B73; COMMON 6026 0x2B74, // 2B74..2B75; UNKNOWN 6027 0x2B76, // 2B76..2B95; COMMON 6028 0x2B96, // 2B96 ; UNKNOWN 6029 0x2B97, // 2B97..2BFF; COMMON 6030 0x2C00, // 2C00..2C5F; GLAGOLITIC 6031 0x2C60, // 2C60..2C7F; LATIN 6032 0x2C80, // 2C80..2CF3; COPTIC 6033 0x2CF4, // 2CF4..2CF8; UNKNOWN 6034 0x2CF9, // 2CF9..2CFF; COPTIC 6035 0x2D00, // 2D00..2D25; GEORGIAN 6036 0x2D26, // 2D26 ; UNKNOWN 6037 0x2D27, // 2D27 ; GEORGIAN 6038 0x2D28, // 2D28..2D2C; UNKNOWN 6039 0x2D2D, // 2D2D ; GEORGIAN 6040 0x2D2E, // 2D2E..2D2F; UNKNOWN 6041 0x2D30, // 2D30..2D67; TIFINAGH 6042 0x2D68, // 2D68..2D6E; UNKNOWN 6043 0x2D6F, // 2D6F..2D70; TIFINAGH 6044 0x2D71, // 2D71..2D7E; UNKNOWN 6045 0x2D7F, // 2D7F ; TIFINAGH 6046 0x2D80, // 2D80..2D96; ETHIOPIC 6047 0x2D97, // 2D97..2D9F; UNKNOWN 6048 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6049 0x2DA7, // 2DA7 ; UNKNOWN 6050 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6051 0x2DAF, // 2DAF ; UNKNOWN 6052 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6053 0x2DB7, // 2DB7 ; UNKNOWN 6054 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6055 0x2DBF, // 2DBF ; UNKNOWN 6056 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6057 0x2DC7, // 2DC7 ; UNKNOWN 6058 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6059 0x2DCF, // 2DCF ; UNKNOWN 6060 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6061 0x2DD7, // 2DD7 ; UNKNOWN 6062 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6063 0x2DDF, // 2DDF ; UNKNOWN 6064 0x2DE0, // 2DE0..2DFF; CYRILLIC 6065 0x2E00, // 2E00..2E5D; COMMON 6066 0x2E5E, // 2E5E..2E7F; UNKNOWN 6067 0x2E80, // 2E80..2E99; HAN 6068 0x2E9A, // 2E9A ; UNKNOWN 6069 0x2E9B, // 2E9B..2EF3; HAN 6070 0x2EF4, // 2EF4..2EFF; UNKNOWN 6071 0x2F00, // 2F00..2FD5; HAN 6072 0x2FD6, // 2FD6..2FEF; UNKNOWN 6073 0x2FF0, // 2FF0..3004; COMMON 6074 0x3005, // 3005 ; HAN 6075 0x3006, // 3006 ; COMMON 6076 0x3007, // 3007 ; HAN 6077 0x3008, // 3008..3020; COMMON 6078 0x3021, // 3021..3029; HAN 6079 0x302A, // 302A..302D; INHERITED 6080 0x302E, // 302E..302F; HANGUL 6081 0x3030, // 3030..3037; COMMON 6082 0x3038, // 3038..303B; HAN 6083 0x303C, // 303C..303F; COMMON 6084 0x3040, // 3040 ; UNKNOWN 6085 0x3041, // 3041..3096; HIRAGANA 6086 0x3097, // 3097..3098; UNKNOWN 6087 0x3099, // 3099..309A; INHERITED 6088 0x309B, // 309B..309C; COMMON 6089 0x309D, // 309D..309F; HIRAGANA 6090 0x30A0, // 30A0 ; COMMON 6091 0x30A1, // 30A1..30FA; KATAKANA 6092 0x30FB, // 30FB..30FC; COMMON 6093 0x30FD, // 30FD..30FF; KATAKANA 6094 0x3100, // 3100..3104; UNKNOWN 6095 0x3105, // 3105..312F; BOPOMOFO 6096 0x3130, // 3130 ; UNKNOWN 6097 0x3131, // 3131..318E; HANGUL 6098 0x318F, // 318F ; UNKNOWN 6099 0x3190, // 3190..319F; COMMON 6100 0x31A0, // 31A0..31BF; BOPOMOFO 6101 0x31C0, // 31C0..31E3; COMMON 6102 0x31E4, // 31E4..31EE; UNKNOWN 6103 0x31EF, // 31EF ; COMMON 6104 0x31F0, // 31F0..31FF; KATAKANA 6105 0x3200, // 3200..321E; HANGUL 6106 0x321F, // 321F ; UNKNOWN 6107 0x3220, // 3220..325F; COMMON 6108 0x3260, // 3260..327E; HANGUL 6109 0x327F, // 327F..32CF; COMMON 6110 0x32D0, // 32D0..32FE; KATAKANA 6111 0x32FF, // 32FF ; COMMON 6112 0x3300, // 3300..3357; KATAKANA 6113 0x3358, // 3358..33FF; COMMON 6114 0x3400, // 3400..4DBF; HAN 6115 0x4DC0, // 4DC0..4DFF; COMMON 6116 0x4E00, // 4E00..9FFF; HAN 6117 0xA000, // A000..A48C; YI 6118 0xA48D, // A48D..A48F; UNKNOWN 6119 0xA490, // A490..A4C6; YI 6120 0xA4C7, // A4C7..A4CF; UNKNOWN 6121 0xA4D0, // A4D0..A4FF; LISU 6122 0xA500, // A500..A62B; VAI 6123 0xA62C, // A62C..A63F; UNKNOWN 6124 0xA640, // A640..A69F; CYRILLIC 6125 0xA6A0, // A6A0..A6F7; BAMUM 6126 0xA6F8, // A6F8..A6FF; UNKNOWN 6127 0xA700, // A700..A721; COMMON 6128 0xA722, // A722..A787; LATIN 6129 0xA788, // A788..A78A; COMMON 6130 0xA78B, // A78B..A7CA; LATIN 6131 0xA7CB, // A7CB..A7CF; UNKNOWN 6132 0xA7D0, // A7D0..A7D1; LATIN 6133 0xA7D2, // A7D2 ; UNKNOWN 6134 0xA7D3, // A7D3 ; LATIN 6135 0xA7D4, // A7D4 ; UNKNOWN 6136 0xA7D5, // A7D5..A7D9; LATIN 6137 0xA7DA, // A7DA..A7F1; UNKNOWN 6138 0xA7F2, // A7F2..A7FF; LATIN 6139 0xA800, // A800..A82C; SYLOTI_NAGRI 6140 0xA82D, // A82D..A82F; UNKNOWN 6141 0xA830, // A830..A839; COMMON 6142 0xA83A, // A83A..A83F; UNKNOWN 6143 0xA840, // A840..A877; PHAGS_PA 6144 0xA878, // A878..A87F; UNKNOWN 6145 0xA880, // A880..A8C5; SAURASHTRA 6146 0xA8C6, // A8C6..A8CD; UNKNOWN 6147 0xA8CE, // A8CE..A8D9; SAURASHTRA 6148 0xA8DA, // A8DA..A8DF; UNKNOWN 6149 0xA8E0, // A8E0..A8FF; DEVANAGARI 6150 0xA900, // A900..A92D; KAYAH_LI 6151 0xA92E, // A92E ; COMMON 6152 0xA92F, // A92F ; KAYAH_LI 6153 0xA930, // A930..A953; REJANG 6154 0xA954, // A954..A95E; UNKNOWN 6155 0xA95F, // A95F ; REJANG 6156 0xA960, // A960..A97C; HANGUL 6157 0xA97D, // A97D..A97F; UNKNOWN 6158 0xA980, // A980..A9CD; JAVANESE 6159 0xA9CE, // A9CE ; UNKNOWN 6160 0xA9CF, // A9CF ; COMMON 6161 0xA9D0, // A9D0..A9D9; JAVANESE 6162 0xA9DA, // A9DA..A9DD; UNKNOWN 6163 0xA9DE, // A9DE..A9DF; JAVANESE 6164 0xA9E0, // A9E0..A9FE; MYANMAR 6165 0xA9FF, // A9FF ; UNKNOWN 6166 0xAA00, // AA00..AA36; CHAM 6167 0xAA37, // AA37..AA3F; UNKNOWN 6168 0xAA40, // AA40..AA4D; CHAM 6169 0xAA4E, // AA4E..AA4F; UNKNOWN 6170 0xAA50, // AA50..AA59; CHAM 6171 0xAA5A, // AA5A..AA5B; UNKNOWN 6172 0xAA5C, // AA5C..AA5F; CHAM 6173 0xAA60, // AA60..AA7F; MYANMAR 6174 0xAA80, // AA80..AAC2; TAI_VIET 6175 0xAAC3, // AAC3..AADA; UNKNOWN 6176 0xAADB, // AADB..AADF; TAI_VIET 6177 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6178 0xAAF7, // AAF7..AB00; UNKNOWN 6179 0xAB01, // AB01..AB06; ETHIOPIC 6180 0xAB07, // AB07..AB08; UNKNOWN 6181 0xAB09, // AB09..AB0E; ETHIOPIC 6182 0xAB0F, // AB0F..AB10; UNKNOWN 6183 0xAB11, // AB11..AB16; ETHIOPIC 6184 0xAB17, // AB17..AB1F; UNKNOWN 6185 0xAB20, // AB20..AB26; ETHIOPIC 6186 0xAB27, // AB27 ; UNKNOWN 6187 0xAB28, // AB28..AB2E; ETHIOPIC 6188 0xAB2F, // AB2F ; UNKNOWN 6189 0xAB30, // AB30..AB5A; LATIN 6190 0xAB5B, // AB5B ; COMMON 6191 0xAB5C, // AB5C..AB64; LATIN 6192 0xAB65, // AB65 ; GREEK 6193 0xAB66, // AB66..AB69; LATIN 6194 0xAB6A, // AB6A..AB6B; COMMON 6195 0xAB6C, // AB6C..AB6F; UNKNOWN 6196 0xAB70, // AB70..ABBF; CHEROKEE 6197 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6198 0xABEE, // ABEE..ABEF; UNKNOWN 6199 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6200 0xABFA, // ABFA..ABFF; UNKNOWN 6201 0xAC00, // AC00..D7A3; HANGUL 6202 0xD7A4, // D7A4..D7AF; UNKNOWN 6203 0xD7B0, // D7B0..D7C6; HANGUL 6204 0xD7C7, // D7C7..D7CA; UNKNOWN 6205 0xD7CB, // D7CB..D7FB; HANGUL 6206 0xD7FC, // D7FC..F8FF; UNKNOWN 6207 0xF900, // F900..FA6D; HAN 6208 0xFA6E, // FA6E..FA6F; UNKNOWN 6209 0xFA70, // FA70..FAD9; HAN 6210 0xFADA, // FADA..FAFF; UNKNOWN 6211 0xFB00, // FB00..FB06; LATIN 6212 0xFB07, // FB07..FB12; UNKNOWN 6213 0xFB13, // FB13..FB17; ARMENIAN 6214 0xFB18, // FB18..FB1C; UNKNOWN 6215 0xFB1D, // FB1D..FB36; HEBREW 6216 0xFB37, // FB37 ; UNKNOWN 6217 0xFB38, // FB38..FB3C; HEBREW 6218 0xFB3D, // FB3D ; UNKNOWN 6219 0xFB3E, // FB3E ; HEBREW 6220 0xFB3F, // FB3F ; UNKNOWN 6221 0xFB40, // FB40..FB41; HEBREW 6222 0xFB42, // FB42 ; UNKNOWN 6223 0xFB43, // FB43..FB44; HEBREW 6224 0xFB45, // FB45 ; UNKNOWN 6225 0xFB46, // FB46..FB4F; HEBREW 6226 0xFB50, // FB50..FBC2; ARABIC 6227 0xFBC3, // FBC3..FBD2; UNKNOWN 6228 0xFBD3, // FBD3..FD3D; ARABIC 6229 0xFD3E, // FD3E..FD3F; COMMON 6230 0xFD40, // FD40..FD8F; ARABIC 6231 0xFD90, // FD90..FD91; UNKNOWN 6232 0xFD92, // FD92..FDC7; ARABIC 6233 0xFDC8, // FDC8..FDCE; UNKNOWN 6234 0xFDCF, // FDCF ; ARABIC 6235 0xFDD0, // FDD0..FDEF; UNKNOWN 6236 0xFDF0, // FDF0..FDFF; ARABIC 6237 0xFE00, // FE00..FE0F; INHERITED 6238 0xFE10, // FE10..FE19; COMMON 6239 0xFE1A, // FE1A..FE1F; UNKNOWN 6240 0xFE20, // FE20..FE2D; INHERITED 6241 0xFE2E, // FE2E..FE2F; CYRILLIC 6242 0xFE30, // FE30..FE52; COMMON 6243 0xFE53, // FE53 ; UNKNOWN 6244 0xFE54, // FE54..FE66; COMMON 6245 0xFE67, // FE67 ; UNKNOWN 6246 0xFE68, // FE68..FE6B; COMMON 6247 0xFE6C, // FE6C..FE6F; UNKNOWN 6248 0xFE70, // FE70..FE74; ARABIC 6249 0xFE75, // FE75 ; UNKNOWN 6250 0xFE76, // FE76..FEFC; ARABIC 6251 0xFEFD, // FEFD..FEFE; UNKNOWN 6252 0xFEFF, // FEFF ; COMMON 6253 0xFF00, // FF00 ; UNKNOWN 6254 0xFF01, // FF01..FF20; COMMON 6255 0xFF21, // FF21..FF3A; LATIN 6256 0xFF3B, // FF3B..FF40; COMMON 6257 0xFF41, // FF41..FF5A; LATIN 6258 0xFF5B, // FF5B..FF65; COMMON 6259 0xFF66, // FF66..FF6F; KATAKANA 6260 0xFF70, // FF70 ; COMMON 6261 0xFF71, // FF71..FF9D; KATAKANA 6262 0xFF9E, // FF9E..FF9F; COMMON 6263 0xFFA0, // FFA0..FFBE; HANGUL 6264 0xFFBF, // FFBF..FFC1; UNKNOWN 6265 0xFFC2, // FFC2..FFC7; HANGUL 6266 0xFFC8, // FFC8..FFC9; UNKNOWN 6267 0xFFCA, // FFCA..FFCF; HANGUL 6268 0xFFD0, // FFD0..FFD1; UNKNOWN 6269 0xFFD2, // FFD2..FFD7; HANGUL 6270 0xFFD8, // FFD8..FFD9; UNKNOWN 6271 0xFFDA, // FFDA..FFDC; HANGUL 6272 0xFFDD, // FFDD..FFDF; UNKNOWN 6273 0xFFE0, // FFE0..FFE6; COMMON 6274 0xFFE7, // FFE7 ; UNKNOWN 6275 0xFFE8, // FFE8..FFEE; COMMON 6276 0xFFEF, // FFEF..FFF8; UNKNOWN 6277 0xFFF9, // FFF9..FFFD; COMMON 6278 0xFFFE, // FFFE..FFFF; UNKNOWN 6279 0x10000, // 10000..1000B; LINEAR_B 6280 0x1000C, // 1000C ; UNKNOWN 6281 0x1000D, // 1000D..10026; LINEAR_B 6282 0x10027, // 10027 ; UNKNOWN 6283 0x10028, // 10028..1003A; LINEAR_B 6284 0x1003B, // 1003B ; UNKNOWN 6285 0x1003C, // 1003C..1003D; LINEAR_B 6286 0x1003E, // 1003E ; UNKNOWN 6287 0x1003F, // 1003F..1004D; LINEAR_B 6288 0x1004E, // 1004E..1004F; UNKNOWN 6289 0x10050, // 10050..1005D; LINEAR_B 6290 0x1005E, // 1005E..1007F; UNKNOWN 6291 0x10080, // 10080..100FA; LINEAR_B 6292 0x100FB, // 100FB..100FF; UNKNOWN 6293 0x10100, // 10100..10102; COMMON 6294 0x10103, // 10103..10106; UNKNOWN 6295 0x10107, // 10107..10133; COMMON 6296 0x10134, // 10134..10136; UNKNOWN 6297 0x10137, // 10137..1013F; COMMON 6298 0x10140, // 10140..1018E; GREEK 6299 0x1018F, // 1018F ; UNKNOWN 6300 0x10190, // 10190..1019C; COMMON 6301 0x1019D, // 1019D..1019F; UNKNOWN 6302 0x101A0, // 101A0 ; GREEK 6303 0x101A1, // 101A1..101CF; UNKNOWN 6304 0x101D0, // 101D0..101FC; COMMON 6305 0x101FD, // 101FD ; INHERITED 6306 0x101FE, // 101FE..1027F; UNKNOWN 6307 0x10280, // 10280..1029C; LYCIAN 6308 0x1029D, // 1029D..1029F; UNKNOWN 6309 0x102A0, // 102A0..102D0; CARIAN 6310 0x102D1, // 102D1..102DF; UNKNOWN 6311 0x102E0, // 102E0 ; INHERITED 6312 0x102E1, // 102E1..102FB; COMMON 6313 0x102FC, // 102FC..102FF; UNKNOWN 6314 0x10300, // 10300..10323; OLD_ITALIC 6315 0x10324, // 10324..1032C; UNKNOWN 6316 0x1032D, // 1032D..1032F; OLD_ITALIC 6317 0x10330, // 10330..1034A; GOTHIC 6318 0x1034B, // 1034B..1034F; UNKNOWN 6319 0x10350, // 10350..1037A; OLD_PERMIC 6320 0x1037B, // 1037B..1037F; UNKNOWN 6321 0x10380, // 10380..1039D; UGARITIC 6322 0x1039E, // 1039E ; UNKNOWN 6323 0x1039F, // 1039F ; UGARITIC 6324 0x103A0, // 103A0..103C3; OLD_PERSIAN 6325 0x103C4, // 103C4..103C7; UNKNOWN 6326 0x103C8, // 103C8..103D5; OLD_PERSIAN 6327 0x103D6, // 103D6..103FF; UNKNOWN 6328 0x10400, // 10400..1044F; DESERET 6329 0x10450, // 10450..1047F; SHAVIAN 6330 0x10480, // 10480..1049D; OSMANYA 6331 0x1049E, // 1049E..1049F; UNKNOWN 6332 0x104A0, // 104A0..104A9; OSMANYA 6333 0x104AA, // 104AA..104AF; UNKNOWN 6334 0x104B0, // 104B0..104D3; OSAGE 6335 0x104D4, // 104D4..104D7; UNKNOWN 6336 0x104D8, // 104D8..104FB; OSAGE 6337 0x104FC, // 104FC..104FF; UNKNOWN 6338 0x10500, // 10500..10527; ELBASAN 6339 0x10528, // 10528..1052F; UNKNOWN 6340 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6341 0x10564, // 10564..1056E; UNKNOWN 6342 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6343 0x10570, // 10570..1057A; VITHKUQI 6344 0x1057B, // 1057B ; UNKNOWN 6345 0x1057C, // 1057C..1058A; VITHKUQI 6346 0x1058B, // 1058B ; UNKNOWN 6347 0x1058C, // 1058C..10592; VITHKUQI 6348 0x10593, // 10593 ; UNKNOWN 6349 0x10594, // 10594..10595; VITHKUQI 6350 0x10596, // 10596 ; UNKNOWN 6351 0x10597, // 10597..105A1; VITHKUQI 6352 0x105A2, // 105A2 ; UNKNOWN 6353 0x105A3, // 105A3..105B1; VITHKUQI 6354 0x105B2, // 105B2 ; UNKNOWN 6355 0x105B3, // 105B3..105B9; VITHKUQI 6356 0x105BA, // 105BA ; UNKNOWN 6357 0x105BB, // 105BB..105BC; VITHKUQI 6358 0x105BD, // 105BD..105FF; UNKNOWN 6359 0x10600, // 10600..10736; LINEAR_A 6360 0x10737, // 10737..1073F; UNKNOWN 6361 0x10740, // 10740..10755; LINEAR_A 6362 0x10756, // 10756..1075F; UNKNOWN 6363 0x10760, // 10760..10767; LINEAR_A 6364 0x10768, // 10768..1077F; UNKNOWN 6365 0x10780, // 10780..10785; LATIN 6366 0x10786, // 10786 ; UNKNOWN 6367 0x10787, // 10787..107B0; LATIN 6368 0x107B1, // 107B1 ; UNKNOWN 6369 0x107B2, // 107B2..107BA; LATIN 6370 0x107BB, // 107BB..107FF; UNKNOWN 6371 0x10800, // 10800..10805; CYPRIOT 6372 0x10806, // 10806..10807; UNKNOWN 6373 0x10808, // 10808 ; CYPRIOT 6374 0x10809, // 10809 ; UNKNOWN 6375 0x1080A, // 1080A..10835; CYPRIOT 6376 0x10836, // 10836 ; UNKNOWN 6377 0x10837, // 10837..10838; CYPRIOT 6378 0x10839, // 10839..1083B; UNKNOWN 6379 0x1083C, // 1083C ; CYPRIOT 6380 0x1083D, // 1083D..1083E; UNKNOWN 6381 0x1083F, // 1083F ; CYPRIOT 6382 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6383 0x10856, // 10856 ; UNKNOWN 6384 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6385 0x10860, // 10860..1087F; PALMYRENE 6386 0x10880, // 10880..1089E; NABATAEAN 6387 0x1089F, // 1089F..108A6; UNKNOWN 6388 0x108A7, // 108A7..108AF; NABATAEAN 6389 0x108B0, // 108B0..108DF; UNKNOWN 6390 0x108E0, // 108E0..108F2; HATRAN 6391 0x108F3, // 108F3 ; UNKNOWN 6392 0x108F4, // 108F4..108F5; HATRAN 6393 0x108F6, // 108F6..108FA; UNKNOWN 6394 0x108FB, // 108FB..108FF; HATRAN 6395 0x10900, // 10900..1091B; PHOENICIAN 6396 0x1091C, // 1091C..1091E; UNKNOWN 6397 0x1091F, // 1091F ; PHOENICIAN 6398 0x10920, // 10920..10939; LYDIAN 6399 0x1093A, // 1093A..1093E; UNKNOWN 6400 0x1093F, // 1093F ; LYDIAN 6401 0x10940, // 10940..1097F; UNKNOWN 6402 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6403 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6404 0x109B8, // 109B8..109BB; UNKNOWN 6405 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6406 0x109D0, // 109D0..109D1; UNKNOWN 6407 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6408 0x10A00, // 10A00..10A03; KHAROSHTHI 6409 0x10A04, // 10A04 ; UNKNOWN 6410 0x10A05, // 10A05..10A06; KHAROSHTHI 6411 0x10A07, // 10A07..10A0B; UNKNOWN 6412 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6413 0x10A14, // 10A14 ; UNKNOWN 6414 0x10A15, // 10A15..10A17; KHAROSHTHI 6415 0x10A18, // 10A18 ; UNKNOWN 6416 0x10A19, // 10A19..10A35; KHAROSHTHI 6417 0x10A36, // 10A36..10A37; UNKNOWN 6418 0x10A38, // 10A38..10A3A; KHAROSHTHI 6419 0x10A3B, // 10A3B..10A3E; UNKNOWN 6420 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6421 0x10A49, // 10A49..10A4F; UNKNOWN 6422 0x10A50, // 10A50..10A58; KHAROSHTHI 6423 0x10A59, // 10A59..10A5F; UNKNOWN 6424 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6425 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6426 0x10AA0, // 10AA0..10ABF; UNKNOWN 6427 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6428 0x10AE7, // 10AE7..10AEA; UNKNOWN 6429 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6430 0x10AF7, // 10AF7..10AFF; UNKNOWN 6431 0x10B00, // 10B00..10B35; AVESTAN 6432 0x10B36, // 10B36..10B38; UNKNOWN 6433 0x10B39, // 10B39..10B3F; AVESTAN 6434 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6435 0x10B56, // 10B56..10B57; UNKNOWN 6436 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6437 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6438 0x10B73, // 10B73..10B77; UNKNOWN 6439 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6440 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6441 0x10B92, // 10B92..10B98; UNKNOWN 6442 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6443 0x10B9D, // 10B9D..10BA8; UNKNOWN 6444 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6445 0x10BB0, // 10BB0..10BFF; UNKNOWN 6446 0x10C00, // 10C00..10C48; OLD_TURKIC 6447 0x10C49, // 10C49..10C7F; UNKNOWN 6448 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6449 0x10CB3, // 10CB3..10CBF; UNKNOWN 6450 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6451 0x10CF3, // 10CF3..10CF9; UNKNOWN 6452 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6453 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6454 0x10D28, // 10D28..10D2F; UNKNOWN 6455 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6456 0x10D3A, // 10D3A..10E5F; UNKNOWN 6457 0x10E60, // 10E60..10E7E; ARABIC 6458 0x10E7F, // 10E7F ; UNKNOWN 6459 0x10E80, // 10E80..10EA9; YEZIDI 6460 0x10EAA, // 10EAA ; UNKNOWN 6461 0x10EAB, // 10EAB..10EAD; YEZIDI 6462 0x10EAE, // 10EAE..10EAF; UNKNOWN 6463 0x10EB0, // 10EB0..10EB1; YEZIDI 6464 0x10EB2, // 10EB2..10EFC; UNKNOWN 6465 0x10EFD, // 10EFD..10EFF; ARABIC 6466 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6467 0x10F28, // 10F28..10F2F; UNKNOWN 6468 0x10F30, // 10F30..10F59; SOGDIAN 6469 0x10F5A, // 10F5A..10F6F; UNKNOWN 6470 0x10F70, // 10F70..10F89; OLD_UYGHUR 6471 0x10F8A, // 10F8A..10FAF; UNKNOWN 6472 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6473 0x10FCC, // 10FCC..10FDF; UNKNOWN 6474 0x10FE0, // 10FE0..10FF6; ELYMAIC 6475 0x10FF7, // 10FF7..10FFF; UNKNOWN 6476 0x11000, // 11000..1104D; BRAHMI 6477 0x1104E, // 1104E..11051; UNKNOWN 6478 0x11052, // 11052..11075; BRAHMI 6479 0x11076, // 11076..1107E; UNKNOWN 6480 0x1107F, // 1107F ; BRAHMI 6481 0x11080, // 11080..110C2; KAITHI 6482 0x110C3, // 110C3..110CC; UNKNOWN 6483 0x110CD, // 110CD ; KAITHI 6484 0x110CE, // 110CE..110CF; UNKNOWN 6485 0x110D0, // 110D0..110E8; SORA_SOMPENG 6486 0x110E9, // 110E9..110EF; UNKNOWN 6487 0x110F0, // 110F0..110F9; SORA_SOMPENG 6488 0x110FA, // 110FA..110FF; UNKNOWN 6489 0x11100, // 11100..11134; CHAKMA 6490 0x11135, // 11135 ; UNKNOWN 6491 0x11136, // 11136..11147; CHAKMA 6492 0x11148, // 11148..1114F; UNKNOWN 6493 0x11150, // 11150..11176; MAHAJANI 6494 0x11177, // 11177..1117F; UNKNOWN 6495 0x11180, // 11180..111DF; SHARADA 6496 0x111E0, // 111E0 ; UNKNOWN 6497 0x111E1, // 111E1..111F4; SINHALA 6498 0x111F5, // 111F5..111FF; UNKNOWN 6499 0x11200, // 11200..11211; KHOJKI 6500 0x11212, // 11212 ; UNKNOWN 6501 0x11213, // 11213..11241; KHOJKI 6502 0x11242, // 11242..1127F; UNKNOWN 6503 0x11280, // 11280..11286; MULTANI 6504 0x11287, // 11287 ; UNKNOWN 6505 0x11288, // 11288 ; MULTANI 6506 0x11289, // 11289 ; UNKNOWN 6507 0x1128A, // 1128A..1128D; MULTANI 6508 0x1128E, // 1128E ; UNKNOWN 6509 0x1128F, // 1128F..1129D; MULTANI 6510 0x1129E, // 1129E ; UNKNOWN 6511 0x1129F, // 1129F..112A9; MULTANI 6512 0x112AA, // 112AA..112AF; UNKNOWN 6513 0x112B0, // 112B0..112EA; KHUDAWADI 6514 0x112EB, // 112EB..112EF; UNKNOWN 6515 0x112F0, // 112F0..112F9; KHUDAWADI 6516 0x112FA, // 112FA..112FF; UNKNOWN 6517 0x11300, // 11300..11303; GRANTHA 6518 0x11304, // 11304 ; UNKNOWN 6519 0x11305, // 11305..1130C; GRANTHA 6520 0x1130D, // 1130D..1130E; UNKNOWN 6521 0x1130F, // 1130F..11310; GRANTHA 6522 0x11311, // 11311..11312; UNKNOWN 6523 0x11313, // 11313..11328; GRANTHA 6524 0x11329, // 11329 ; UNKNOWN 6525 0x1132A, // 1132A..11330; GRANTHA 6526 0x11331, // 11331 ; UNKNOWN 6527 0x11332, // 11332..11333; GRANTHA 6528 0x11334, // 11334 ; UNKNOWN 6529 0x11335, // 11335..11339; GRANTHA 6530 0x1133A, // 1133A ; UNKNOWN 6531 0x1133B, // 1133B ; INHERITED 6532 0x1133C, // 1133C..11344; GRANTHA 6533 0x11345, // 11345..11346; UNKNOWN 6534 0x11347, // 11347..11348; GRANTHA 6535 0x11349, // 11349..1134A; UNKNOWN 6536 0x1134B, // 1134B..1134D; GRANTHA 6537 0x1134E, // 1134E..1134F; UNKNOWN 6538 0x11350, // 11350 ; GRANTHA 6539 0x11351, // 11351..11356; UNKNOWN 6540 0x11357, // 11357 ; GRANTHA 6541 0x11358, // 11358..1135C; UNKNOWN 6542 0x1135D, // 1135D..11363; GRANTHA 6543 0x11364, // 11364..11365; UNKNOWN 6544 0x11366, // 11366..1136C; GRANTHA 6545 0x1136D, // 1136D..1136F; UNKNOWN 6546 0x11370, // 11370..11374; GRANTHA 6547 0x11375, // 11375..113FF; UNKNOWN 6548 0x11400, // 11400..1145B; NEWA 6549 0x1145C, // 1145C ; UNKNOWN 6550 0x1145D, // 1145D..11461; NEWA 6551 0x11462, // 11462..1147F; UNKNOWN 6552 0x11480, // 11480..114C7; TIRHUTA 6553 0x114C8, // 114C8..114CF; UNKNOWN 6554 0x114D0, // 114D0..114D9; TIRHUTA 6555 0x114DA, // 114DA..1157F; UNKNOWN 6556 0x11580, // 11580..115B5; SIDDHAM 6557 0x115B6, // 115B6..115B7; UNKNOWN 6558 0x115B8, // 115B8..115DD; SIDDHAM 6559 0x115DE, // 115DE..115FF; UNKNOWN 6560 0x11600, // 11600..11644; MODI 6561 0x11645, // 11645..1164F; UNKNOWN 6562 0x11650, // 11650..11659; MODI 6563 0x1165A, // 1165A..1165F; UNKNOWN 6564 0x11660, // 11660..1166C; MONGOLIAN 6565 0x1166D, // 1166D..1167F; UNKNOWN 6566 0x11680, // 11680..116B9; TAKRI 6567 0x116BA, // 116BA..116BF; UNKNOWN 6568 0x116C0, // 116C0..116C9; TAKRI 6569 0x116CA, // 116CA..116FF; UNKNOWN 6570 0x11700, // 11700..1171A; AHOM 6571 0x1171B, // 1171B..1171C; UNKNOWN 6572 0x1171D, // 1171D..1172B; AHOM 6573 0x1172C, // 1172C..1172F; UNKNOWN 6574 0x11730, // 11730..11746; AHOM 6575 0x11747, // 11747..117FF; UNKNOWN 6576 0x11800, // 11800..1183B; DOGRA 6577 0x1183C, // 1183C..1189F; UNKNOWN 6578 0x118A0, // 118A0..118F2; WARANG_CITI 6579 0x118F3, // 118F3..118FE; UNKNOWN 6580 0x118FF, // 118FF ; WARANG_CITI 6581 0x11900, // 11900..11906; DIVES_AKURU 6582 0x11907, // 11907..11908; UNKNOWN 6583 0x11909, // 11909 ; DIVES_AKURU 6584 0x1190A, // 1190A..1190B; UNKNOWN 6585 0x1190C, // 1190C..11913; DIVES_AKURU 6586 0x11914, // 11914 ; UNKNOWN 6587 0x11915, // 11915..11916; DIVES_AKURU 6588 0x11917, // 11917 ; UNKNOWN 6589 0x11918, // 11918..11935; DIVES_AKURU 6590 0x11936, // 11936 ; UNKNOWN 6591 0x11937, // 11937..11938; DIVES_AKURU 6592 0x11939, // 11939..1193A; UNKNOWN 6593 0x1193B, // 1193B..11946; DIVES_AKURU 6594 0x11947, // 11947..1194F; UNKNOWN 6595 0x11950, // 11950..11959; DIVES_AKURU 6596 0x1195A, // 1195A..1199F; UNKNOWN 6597 0x119A0, // 119A0..119A7; NANDINAGARI 6598 0x119A8, // 119A8..119A9; UNKNOWN 6599 0x119AA, // 119AA..119D7; NANDINAGARI 6600 0x119D8, // 119D8..119D9; UNKNOWN 6601 0x119DA, // 119DA..119E4; NANDINAGARI 6602 0x119E5, // 119E5..119FF; UNKNOWN 6603 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6604 0x11A48, // 11A48..11A4F; UNKNOWN 6605 0x11A50, // 11A50..11AA2; SOYOMBO 6606 0x11AA3, // 11AA3..11AAF; UNKNOWN 6607 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6608 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6609 0x11AF9, // 11AF9..11AFF; UNKNOWN 6610 0x11B00, // 11B00..11B09; DEVANAGARI 6611 0x11B0A, // 11B0A..11BFF; UNKNOWN 6612 0x11C00, // 11C00..11C08; BHAIKSUKI 6613 0x11C09, // 11C09 ; UNKNOWN 6614 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6615 0x11C37, // 11C37 ; UNKNOWN 6616 0x11C38, // 11C38..11C45; BHAIKSUKI 6617 0x11C46, // 11C46..11C4F; UNKNOWN 6618 0x11C50, // 11C50..11C6C; BHAIKSUKI 6619 0x11C6D, // 11C6D..11C6F; UNKNOWN 6620 0x11C70, // 11C70..11C8F; MARCHEN 6621 0x11C90, // 11C90..11C91; UNKNOWN 6622 0x11C92, // 11C92..11CA7; MARCHEN 6623 0x11CA8, // 11CA8 ; UNKNOWN 6624 0x11CA9, // 11CA9..11CB6; MARCHEN 6625 0x11CB7, // 11CB7..11CFF; UNKNOWN 6626 0x11D00, // 11D00..11D06; MASARAM_GONDI 6627 0x11D07, // 11D07 ; UNKNOWN 6628 0x11D08, // 11D08..11D09; MASARAM_GONDI 6629 0x11D0A, // 11D0A ; UNKNOWN 6630 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6631 0x11D37, // 11D37..11D39; UNKNOWN 6632 0x11D3A, // 11D3A ; MASARAM_GONDI 6633 0x11D3B, // 11D3B ; UNKNOWN 6634 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6635 0x11D3E, // 11D3E ; UNKNOWN 6636 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6637 0x11D48, // 11D48..11D4F; UNKNOWN 6638 0x11D50, // 11D50..11D59; MASARAM_GONDI 6639 0x11D5A, // 11D5A..11D5F; UNKNOWN 6640 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6641 0x11D66, // 11D66 ; UNKNOWN 6642 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6643 0x11D69, // 11D69 ; UNKNOWN 6644 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6645 0x11D8F, // 11D8F ; UNKNOWN 6646 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6647 0x11D92, // 11D92 ; UNKNOWN 6648 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6649 0x11D99, // 11D99..11D9F; UNKNOWN 6650 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6651 0x11DAA, // 11DAA..11EDF; UNKNOWN 6652 0x11EE0, // 11EE0..11EF8; MAKASAR 6653 0x11EF9, // 11EF9..11EFF; UNKNOWN 6654 0x11F00, // 11F00..11F10; KAWI 6655 0x11F11, // 11F11 ; UNKNOWN 6656 0x11F12, // 11F12..11F3A; KAWI 6657 0x11F3B, // 11F3B..11F3D; UNKNOWN 6658 0x11F3E, // 11F3E..11F59; KAWI 6659 0x11F5A, // 11F5A..11FAF; UNKNOWN 6660 0x11FB0, // 11FB0 ; LISU 6661 0x11FB1, // 11FB1..11FBF; UNKNOWN 6662 0x11FC0, // 11FC0..11FF1; TAMIL 6663 0x11FF2, // 11FF2..11FFE; UNKNOWN 6664 0x11FFF, // 11FFF ; TAMIL 6665 0x12000, // 12000..12399; CUNEIFORM 6666 0x1239A, // 1239A..123FF; UNKNOWN 6667 0x12400, // 12400..1246E; CUNEIFORM 6668 0x1246F, // 1246F ; UNKNOWN 6669 0x12470, // 12470..12474; CUNEIFORM 6670 0x12475, // 12475..1247F; UNKNOWN 6671 0x12480, // 12480..12543; CUNEIFORM 6672 0x12544, // 12544..12F8F; UNKNOWN 6673 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6674 0x12FF3, // 12FF3..12FFF; UNKNOWN 6675 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6676 0x13456, // 13456..143FF; UNKNOWN 6677 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6678 0x14647, // 14647..167FF; UNKNOWN 6679 0x16800, // 16800..16A38; BAMUM 6680 0x16A39, // 16A39..16A3F; UNKNOWN 6681 0x16A40, // 16A40..16A5E; MRO 6682 0x16A5F, // 16A5F ; UNKNOWN 6683 0x16A60, // 16A60..16A69; MRO 6684 0x16A6A, // 16A6A..16A6D; UNKNOWN 6685 0x16A6E, // 16A6E..16A6F; MRO 6686 0x16A70, // 16A70..16ABE; TANGSA 6687 0x16ABF, // 16ABF ; UNKNOWN 6688 0x16AC0, // 16AC0..16AC9; TANGSA 6689 0x16ACA, // 16ACA..16ACF; UNKNOWN 6690 0x16AD0, // 16AD0..16AED; BASSA_VAH 6691 0x16AEE, // 16AEE..16AEF; UNKNOWN 6692 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6693 0x16AF6, // 16AF6..16AFF; UNKNOWN 6694 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6695 0x16B46, // 16B46..16B4F; UNKNOWN 6696 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6697 0x16B5A, // 16B5A ; UNKNOWN 6698 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6699 0x16B62, // 16B62 ; UNKNOWN 6700 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6701 0x16B78, // 16B78..16B7C; UNKNOWN 6702 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6703 0x16B90, // 16B90..16E3F; UNKNOWN 6704 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6705 0x16E9B, // 16E9B..16EFF; UNKNOWN 6706 0x16F00, // 16F00..16F4A; MIAO 6707 0x16F4B, // 16F4B..16F4E; UNKNOWN 6708 0x16F4F, // 16F4F..16F87; MIAO 6709 0x16F88, // 16F88..16F8E; UNKNOWN 6710 0x16F8F, // 16F8F..16F9F; MIAO 6711 0x16FA0, // 16FA0..16FDF; UNKNOWN 6712 0x16FE0, // 16FE0 ; TANGUT 6713 0x16FE1, // 16FE1 ; NUSHU 6714 0x16FE2, // 16FE2..16FE3; HAN 6715 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6716 0x16FE5, // 16FE5..16FEF; UNKNOWN 6717 0x16FF0, // 16FF0..16FF1; HAN 6718 0x16FF2, // 16FF2..16FFF; UNKNOWN 6719 0x17000, // 17000..187F7; TANGUT 6720 0x187F8, // 187F8..187FF; UNKNOWN 6721 0x18800, // 18800..18AFF; TANGUT 6722 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6723 0x18CD6, // 18CD6..18CFF; UNKNOWN 6724 0x18D00, // 18D00..18D08; TANGUT 6725 0x18D09, // 18D09..1AFEF; UNKNOWN 6726 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 6727 0x1AFF4, // 1AFF4 ; UNKNOWN 6728 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 6729 0x1AFFC, // 1AFFC ; UNKNOWN 6730 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 6731 0x1AFFF, // 1AFFF ; UNKNOWN 6732 0x1B000, // 1B000 ; KATAKANA 6733 0x1B001, // 1B001..1B11F; HIRAGANA 6734 0x1B120, // 1B120..1B122; KATAKANA 6735 0x1B123, // 1B123..1B131; UNKNOWN 6736 0x1B132, // 1B132 ; HIRAGANA 6737 0x1B133, // 1B133..1B14F; UNKNOWN 6738 0x1B150, // 1B150..1B152; HIRAGANA 6739 0x1B153, // 1B153..1B154; UNKNOWN 6740 0x1B155, // 1B155 ; KATAKANA 6741 0x1B156, // 1B156..1B163; UNKNOWN 6742 0x1B164, // 1B164..1B167; KATAKANA 6743 0x1B168, // 1B168..1B16F; UNKNOWN 6744 0x1B170, // 1B170..1B2FB; NUSHU 6745 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6746 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6747 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6748 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6749 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6750 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6751 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6752 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6753 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6754 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6755 0x1BCA0, // 1BCA0..1BCA3; COMMON 6756 0x1BCA4, // 1BCA4..1CEFF; UNKNOWN 6757 0x1CF00, // 1CF00..1CF2D; INHERITED 6758 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 6759 0x1CF30, // 1CF30..1CF46; INHERITED 6760 0x1CF47, // 1CF47..1CF4F; UNKNOWN 6761 0x1CF50, // 1CF50..1CFC3; COMMON 6762 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 6763 0x1D000, // 1D000..1D0F5; COMMON 6764 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6765 0x1D100, // 1D100..1D126; COMMON 6766 0x1D127, // 1D127..1D128; UNKNOWN 6767 0x1D129, // 1D129..1D166; COMMON 6768 0x1D167, // 1D167..1D169; INHERITED 6769 0x1D16A, // 1D16A..1D17A; COMMON 6770 0x1D17B, // 1D17B..1D182; INHERITED 6771 0x1D183, // 1D183..1D184; COMMON 6772 0x1D185, // 1D185..1D18B; INHERITED 6773 0x1D18C, // 1D18C..1D1A9; COMMON 6774 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6775 0x1D1AE, // 1D1AE..1D1EA; COMMON 6776 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 6777 0x1D200, // 1D200..1D245; GREEK 6778 0x1D246, // 1D246..1D2BF; UNKNOWN 6779 0x1D2C0, // 1D2C0..1D2D3; COMMON 6780 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 6781 0x1D2E0, // 1D2E0..1D2F3; COMMON 6782 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6783 0x1D300, // 1D300..1D356; COMMON 6784 0x1D357, // 1D357..1D35F; UNKNOWN 6785 0x1D360, // 1D360..1D378; COMMON 6786 0x1D379, // 1D379..1D3FF; UNKNOWN 6787 0x1D400, // 1D400..1D454; COMMON 6788 0x1D455, // 1D455 ; UNKNOWN 6789 0x1D456, // 1D456..1D49C; COMMON 6790 0x1D49D, // 1D49D ; UNKNOWN 6791 0x1D49E, // 1D49E..1D49F; COMMON 6792 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6793 0x1D4A2, // 1D4A2 ; COMMON 6794 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6795 0x1D4A5, // 1D4A5..1D4A6; COMMON 6796 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6797 0x1D4A9, // 1D4A9..1D4AC; COMMON 6798 0x1D4AD, // 1D4AD ; UNKNOWN 6799 0x1D4AE, // 1D4AE..1D4B9; COMMON 6800 0x1D4BA, // 1D4BA ; UNKNOWN 6801 0x1D4BB, // 1D4BB ; COMMON 6802 0x1D4BC, // 1D4BC ; UNKNOWN 6803 0x1D4BD, // 1D4BD..1D4C3; COMMON 6804 0x1D4C4, // 1D4C4 ; UNKNOWN 6805 0x1D4C5, // 1D4C5..1D505; COMMON 6806 0x1D506, // 1D506 ; UNKNOWN 6807 0x1D507, // 1D507..1D50A; COMMON 6808 0x1D50B, // 1D50B..1D50C; UNKNOWN 6809 0x1D50D, // 1D50D..1D514; COMMON 6810 0x1D515, // 1D515 ; UNKNOWN 6811 0x1D516, // 1D516..1D51C; COMMON 6812 0x1D51D, // 1D51D ; UNKNOWN 6813 0x1D51E, // 1D51E..1D539; COMMON 6814 0x1D53A, // 1D53A ; UNKNOWN 6815 0x1D53B, // 1D53B..1D53E; COMMON 6816 0x1D53F, // 1D53F ; UNKNOWN 6817 0x1D540, // 1D540..1D544; COMMON 6818 0x1D545, // 1D545 ; UNKNOWN 6819 0x1D546, // 1D546 ; COMMON 6820 0x1D547, // 1D547..1D549; UNKNOWN 6821 0x1D54A, // 1D54A..1D550; COMMON 6822 0x1D551, // 1D551 ; UNKNOWN 6823 0x1D552, // 1D552..1D6A5; COMMON 6824 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6825 0x1D6A8, // 1D6A8..1D7CB; COMMON 6826 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6827 0x1D7CE, // 1D7CE..1D7FF; COMMON 6828 0x1D800, // 1D800..1DA8B; SIGNWRITING 6829 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6830 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6831 0x1DAA0, // 1DAA0 ; UNKNOWN 6832 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6833 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 6834 0x1DF00, // 1DF00..1DF1E; LATIN 6835 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 6836 0x1DF25, // 1DF25..1DF2A; LATIN 6837 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 6838 0x1E000, // 1E000..1E006; GLAGOLITIC 6839 0x1E007, // 1E007 ; UNKNOWN 6840 0x1E008, // 1E008..1E018; GLAGOLITIC 6841 0x1E019, // 1E019..1E01A; UNKNOWN 6842 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6843 0x1E022, // 1E022 ; UNKNOWN 6844 0x1E023, // 1E023..1E024; GLAGOLITIC 6845 0x1E025, // 1E025 ; UNKNOWN 6846 0x1E026, // 1E026..1E02A; GLAGOLITIC 6847 0x1E02B, // 1E02B..1E02F; UNKNOWN 6848 0x1E030, // 1E030..1E06D; CYRILLIC 6849 0x1E06E, // 1E06E..1E08E; UNKNOWN 6850 0x1E08F, // 1E08F ; CYRILLIC 6851 0x1E090, // 1E090..1E0FF; UNKNOWN 6852 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 6853 0x1E12D, // 1E12D..1E12F; UNKNOWN 6854 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 6855 0x1E13E, // 1E13E..1E13F; UNKNOWN 6856 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 6857 0x1E14A, // 1E14A..1E14D; UNKNOWN 6858 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 6859 0x1E150, // 1E150..1E28F; UNKNOWN 6860 0x1E290, // 1E290..1E2AE; TOTO 6861 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 6862 0x1E2C0, // 1E2C0..1E2F9; WANCHO 6863 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 6864 0x1E2FF, // 1E2FF ; WANCHO 6865 0x1E300, // 1E300..1E4CF; UNKNOWN 6866 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 6867 0x1E4FA, // 1E4FA..1E7DF; UNKNOWN 6868 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 6869 0x1E7E7, // 1E7E7 ; UNKNOWN 6870 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 6871 0x1E7EC, // 1E7EC ; UNKNOWN 6872 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 6873 0x1E7EF, // 1E7EF ; UNKNOWN 6874 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 6875 0x1E7FF, // 1E7FF ; UNKNOWN 6876 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6877 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6878 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6879 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6880 0x1E900, // 1E900..1E94B; ADLAM 6881 0x1E94C, // 1E94C..1E94F; UNKNOWN 6882 0x1E950, // 1E950..1E959; ADLAM 6883 0x1E95A, // 1E95A..1E95D; UNKNOWN 6884 0x1E95E, // 1E95E..1E95F; ADLAM 6885 0x1E960, // 1E960..1EC70; UNKNOWN 6886 0x1EC71, // 1EC71..1ECB4; COMMON 6887 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 6888 0x1ED01, // 1ED01..1ED3D; COMMON 6889 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 6890 0x1EE00, // 1EE00..1EE03; ARABIC 6891 0x1EE04, // 1EE04 ; UNKNOWN 6892 0x1EE05, // 1EE05..1EE1F; ARABIC 6893 0x1EE20, // 1EE20 ; UNKNOWN 6894 0x1EE21, // 1EE21..1EE22; ARABIC 6895 0x1EE23, // 1EE23 ; UNKNOWN 6896 0x1EE24, // 1EE24 ; ARABIC 6897 0x1EE25, // 1EE25..1EE26; UNKNOWN 6898 0x1EE27, // 1EE27 ; ARABIC 6899 0x1EE28, // 1EE28 ; UNKNOWN 6900 0x1EE29, // 1EE29..1EE32; ARABIC 6901 0x1EE33, // 1EE33 ; UNKNOWN 6902 0x1EE34, // 1EE34..1EE37; ARABIC 6903 0x1EE38, // 1EE38 ; UNKNOWN 6904 0x1EE39, // 1EE39 ; ARABIC 6905 0x1EE3A, // 1EE3A ; UNKNOWN 6906 0x1EE3B, // 1EE3B ; ARABIC 6907 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6908 0x1EE42, // 1EE42 ; ARABIC 6909 0x1EE43, // 1EE43..1EE46; UNKNOWN 6910 0x1EE47, // 1EE47 ; ARABIC 6911 0x1EE48, // 1EE48 ; UNKNOWN 6912 0x1EE49, // 1EE49 ; ARABIC 6913 0x1EE4A, // 1EE4A ; UNKNOWN 6914 0x1EE4B, // 1EE4B ; ARABIC 6915 0x1EE4C, // 1EE4C ; UNKNOWN 6916 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6917 0x1EE50, // 1EE50 ; UNKNOWN 6918 0x1EE51, // 1EE51..1EE52; ARABIC 6919 0x1EE53, // 1EE53 ; UNKNOWN 6920 0x1EE54, // 1EE54 ; ARABIC 6921 0x1EE55, // 1EE55..1EE56; UNKNOWN 6922 0x1EE57, // 1EE57 ; ARABIC 6923 0x1EE58, // 1EE58 ; UNKNOWN 6924 0x1EE59, // 1EE59 ; ARABIC 6925 0x1EE5A, // 1EE5A ; UNKNOWN 6926 0x1EE5B, // 1EE5B ; ARABIC 6927 0x1EE5C, // 1EE5C ; UNKNOWN 6928 0x1EE5D, // 1EE5D ; ARABIC 6929 0x1EE5E, // 1EE5E ; UNKNOWN 6930 0x1EE5F, // 1EE5F ; ARABIC 6931 0x1EE60, // 1EE60 ; UNKNOWN 6932 0x1EE61, // 1EE61..1EE62; ARABIC 6933 0x1EE63, // 1EE63 ; UNKNOWN 6934 0x1EE64, // 1EE64 ; ARABIC 6935 0x1EE65, // 1EE65..1EE66; UNKNOWN 6936 0x1EE67, // 1EE67..1EE6A; ARABIC 6937 0x1EE6B, // 1EE6B ; UNKNOWN 6938 0x1EE6C, // 1EE6C..1EE72; ARABIC 6939 0x1EE73, // 1EE73 ; UNKNOWN 6940 0x1EE74, // 1EE74..1EE77; ARABIC 6941 0x1EE78, // 1EE78 ; UNKNOWN 6942 0x1EE79, // 1EE79..1EE7C; ARABIC 6943 0x1EE7D, // 1EE7D ; UNKNOWN 6944 0x1EE7E, // 1EE7E ; ARABIC 6945 0x1EE7F, // 1EE7F ; UNKNOWN 6946 0x1EE80, // 1EE80..1EE89; ARABIC 6947 0x1EE8A, // 1EE8A ; UNKNOWN 6948 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6949 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6950 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6951 0x1EEA4, // 1EEA4 ; UNKNOWN 6952 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6953 0x1EEAA, // 1EEAA ; UNKNOWN 6954 0x1EEAB, // 1EEAB..1EEBB; ARABIC 6955 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 6956 0x1EEF0, // 1EEF0..1EEF1; ARABIC 6957 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 6958 0x1F000, // 1F000..1F02B; COMMON 6959 0x1F02C, // 1F02C..1F02F; UNKNOWN 6960 0x1F030, // 1F030..1F093; COMMON 6961 0x1F094, // 1F094..1F09F; UNKNOWN 6962 0x1F0A0, // 1F0A0..1F0AE; COMMON 6963 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 6964 0x1F0B1, // 1F0B1..1F0BF; COMMON 6965 0x1F0C0, // 1F0C0 ; UNKNOWN 6966 0x1F0C1, // 1F0C1..1F0CF; COMMON 6967 0x1F0D0, // 1F0D0 ; UNKNOWN 6968 0x1F0D1, // 1F0D1..1F0F5; COMMON 6969 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 6970 0x1F100, // 1F100..1F1AD; COMMON 6971 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 6972 0x1F1E6, // 1F1E6..1F1FF; COMMON 6973 0x1F200, // 1F200 ; HIRAGANA 6974 0x1F201, // 1F201..1F202; COMMON 6975 0x1F203, // 1F203..1F20F; UNKNOWN 6976 0x1F210, // 1F210..1F23B; COMMON 6977 0x1F23C, // 1F23C..1F23F; UNKNOWN 6978 0x1F240, // 1F240..1F248; COMMON 6979 0x1F249, // 1F249..1F24F; UNKNOWN 6980 0x1F250, // 1F250..1F251; COMMON 6981 0x1F252, // 1F252..1F25F; UNKNOWN 6982 0x1F260, // 1F260..1F265; COMMON 6983 0x1F266, // 1F266..1F2FF; UNKNOWN 6984 0x1F300, // 1F300..1F6D7; COMMON 6985 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN 6986 0x1F6DC, // 1F6DC..1F6EC; COMMON 6987 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 6988 0x1F6F0, // 1F6F0..1F6FC; COMMON 6989 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 6990 0x1F700, // 1F700..1F776; COMMON 6991 0x1F777, // 1F777..1F77A; UNKNOWN 6992 0x1F77B, // 1F77B..1F7D9; COMMON 6993 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 6994 0x1F7E0, // 1F7E0..1F7EB; COMMON 6995 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 6996 0x1F7F0, // 1F7F0 ; COMMON 6997 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 6998 0x1F800, // 1F800..1F80B; COMMON 6999 0x1F80C, // 1F80C..1F80F; UNKNOWN 7000 0x1F810, // 1F810..1F847; COMMON 7001 0x1F848, // 1F848..1F84F; UNKNOWN 7002 0x1F850, // 1F850..1F859; COMMON 7003 0x1F85A, // 1F85A..1F85F; UNKNOWN 7004 0x1F860, // 1F860..1F887; COMMON 7005 0x1F888, // 1F888..1F88F; UNKNOWN 7006 0x1F890, // 1F890..1F8AD; COMMON 7007 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7008 0x1F8B0, // 1F8B0..1F8B1; COMMON 7009 0x1F8B2, // 1F8B2..1F8FF; UNKNOWN 7010 0x1F900, // 1F900..1FA53; COMMON 7011 0x1FA54, // 1FA54..1FA5F; UNKNOWN 7012 0x1FA60, // 1FA60..1FA6D; COMMON 7013 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7014 0x1FA70, // 1FA70..1FA7C; COMMON 7015 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7016 0x1FA80, // 1FA80..1FA88; COMMON 7017 0x1FA89, // 1FA89..1FA8F; UNKNOWN 7018 0x1FA90, // 1FA90..1FABD; COMMON 7019 0x1FABE, // 1FABE ; UNKNOWN 7020 0x1FABF, // 1FABF..1FAC5; COMMON 7021 0x1FAC6, // 1FAC6..1FACD; UNKNOWN 7022 0x1FACE, // 1FACE..1FADB; COMMON 7023 0x1FADC, // 1FADC..1FADF; UNKNOWN 7024 0x1FAE0, // 1FAE0..1FAE8; COMMON 7025 0x1FAE9, // 1FAE9..1FAEF; UNKNOWN 7026 0x1FAF0, // 1FAF0..1FAF8; COMMON 7027 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7028 0x1FB00, // 1FB00..1FB92; COMMON 7029 0x1FB93, // 1FB93 ; UNKNOWN 7030 0x1FB94, // 1FB94..1FBCA; COMMON 7031 0x1FBCB, // 1FBCB..1FBEF; UNKNOWN 7032 0x1FBF0, // 1FBF0..1FBF9; COMMON 7033 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 7034 0x20000, // 20000..2A6DF; HAN 7035 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7036 0x2A700, // 2A700..2B739; HAN 7037 0x2B73A, // 2B73A..2B73F; UNKNOWN 7038 0x2B740, // 2B740..2B81D; HAN 7039 0x2B81E, // 2B81E..2B81F; UNKNOWN 7040 0x2B820, // 2B820..2CEA1; HAN 7041 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 7042 0x2CEB0, // 2CEB0..2EBE0; HAN 7043 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN 7044 0x2EBF0, // 2EBF0..2EE5D; HAN 7045 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN 7046 0x2F800, // 2F800..2FA1D; HAN 7047 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7048 0x30000, // 30000..3134A; HAN 7049 0x3134B, // 3134B..3134F; UNKNOWN 7050 0x31350, // 31350..323AF; HAN 7051 0x323B0, // 323B0..E0000; UNKNOWN 7052 0xE0001, // E0001 ; COMMON 7053 0xE0002, // E0002..E001F; UNKNOWN 7054 0xE0020, // E0020..E007F; COMMON 7055 0xE0080, // E0080..E00FF; UNKNOWN 7056 0xE0100, // E0100..E01EF; INHERITED 7057 0xE01F0, // E01F0..10FFFF; UNKNOWN 7058 }; 7059 7060 private static final UnicodeScript[] scripts = { 7061 COMMON, // 0000..0040 7062 LATIN, // 0041..005A 7063 COMMON, // 005B..0060 7064 LATIN, // 0061..007A 7065 COMMON, // 007B..00A9 7066 LATIN, // 00AA 7067 COMMON, // 00AB..00B9 7068 LATIN, // 00BA 7069 COMMON, // 00BB..00BF 7070 LATIN, // 00C0..00D6 7071 COMMON, // 00D7 7072 LATIN, // 00D8..00F6 7073 COMMON, // 00F7 7074 LATIN, // 00F8..02B8 7075 COMMON, // 02B9..02DF 7076 LATIN, // 02E0..02E4 7077 COMMON, // 02E5..02E9 7078 BOPOMOFO, // 02EA..02EB 7079 COMMON, // 02EC..02FF 7080 INHERITED, // 0300..036F 7081 GREEK, // 0370..0373 7082 COMMON, // 0374 7083 GREEK, // 0375..0377 7084 UNKNOWN, // 0378..0379 7085 GREEK, // 037A..037D 7086 COMMON, // 037E 7087 GREEK, // 037F 7088 UNKNOWN, // 0380..0383 7089 GREEK, // 0384 7090 COMMON, // 0385 7091 GREEK, // 0386 7092 COMMON, // 0387 7093 GREEK, // 0388..038A 7094 UNKNOWN, // 038B 7095 GREEK, // 038C 7096 UNKNOWN, // 038D 7097 GREEK, // 038E..03A1 7098 UNKNOWN, // 03A2 7099 GREEK, // 03A3..03E1 7100 COPTIC, // 03E2..03EF 7101 GREEK, // 03F0..03FF 7102 CYRILLIC, // 0400..0484 7103 INHERITED, // 0485..0486 7104 CYRILLIC, // 0487..052F 7105 UNKNOWN, // 0530 7106 ARMENIAN, // 0531..0556 7107 UNKNOWN, // 0557..0558 7108 ARMENIAN, // 0559..058A 7109 UNKNOWN, // 058B..058C 7110 ARMENIAN, // 058D..058F 7111 UNKNOWN, // 0590 7112 HEBREW, // 0591..05C7 7113 UNKNOWN, // 05C8..05CF 7114 HEBREW, // 05D0..05EA 7115 UNKNOWN, // 05EB..05EE 7116 HEBREW, // 05EF..05F4 7117 UNKNOWN, // 05F5..05FF 7118 ARABIC, // 0600..0604 7119 COMMON, // 0605 7120 ARABIC, // 0606..060B 7121 COMMON, // 060C 7122 ARABIC, // 060D..061A 7123 COMMON, // 061B 7124 ARABIC, // 061C..061E 7125 COMMON, // 061F 7126 ARABIC, // 0620..063F 7127 COMMON, // 0640 7128 ARABIC, // 0641..064A 7129 INHERITED, // 064B..0655 7130 ARABIC, // 0656..066F 7131 INHERITED, // 0670 7132 ARABIC, // 0671..06DC 7133 COMMON, // 06DD 7134 ARABIC, // 06DE..06FF 7135 SYRIAC, // 0700..070D 7136 UNKNOWN, // 070E 7137 SYRIAC, // 070F..074A 7138 UNKNOWN, // 074B..074C 7139 SYRIAC, // 074D..074F 7140 ARABIC, // 0750..077F 7141 THAANA, // 0780..07B1 7142 UNKNOWN, // 07B2..07BF 7143 NKO, // 07C0..07FA 7144 UNKNOWN, // 07FB..07FC 7145 NKO, // 07FD..07FF 7146 SAMARITAN, // 0800..082D 7147 UNKNOWN, // 082E..082F 7148 SAMARITAN, // 0830..083E 7149 UNKNOWN, // 083F 7150 MANDAIC, // 0840..085B 7151 UNKNOWN, // 085C..085D 7152 MANDAIC, // 085E 7153 UNKNOWN, // 085F 7154 SYRIAC, // 0860..086A 7155 UNKNOWN, // 086B..086F 7156 ARABIC, // 0870..088E 7157 UNKNOWN, // 088F 7158 ARABIC, // 0890..0891 7159 UNKNOWN, // 0892..0897 7160 ARABIC, // 0898..08E1 7161 COMMON, // 08E2 7162 ARABIC, // 08E3..08FF 7163 DEVANAGARI, // 0900..0950 7164 INHERITED, // 0951..0954 7165 DEVANAGARI, // 0955..0963 7166 COMMON, // 0964..0965 7167 DEVANAGARI, // 0966..097F 7168 BENGALI, // 0980..0983 7169 UNKNOWN, // 0984 7170 BENGALI, // 0985..098C 7171 UNKNOWN, // 098D..098E 7172 BENGALI, // 098F..0990 7173 UNKNOWN, // 0991..0992 7174 BENGALI, // 0993..09A8 7175 UNKNOWN, // 09A9 7176 BENGALI, // 09AA..09B0 7177 UNKNOWN, // 09B1 7178 BENGALI, // 09B2 7179 UNKNOWN, // 09B3..09B5 7180 BENGALI, // 09B6..09B9 7181 UNKNOWN, // 09BA..09BB 7182 BENGALI, // 09BC..09C4 7183 UNKNOWN, // 09C5..09C6 7184 BENGALI, // 09C7..09C8 7185 UNKNOWN, // 09C9..09CA 7186 BENGALI, // 09CB..09CE 7187 UNKNOWN, // 09CF..09D6 7188 BENGALI, // 09D7 7189 UNKNOWN, // 09D8..09DB 7190 BENGALI, // 09DC..09DD 7191 UNKNOWN, // 09DE 7192 BENGALI, // 09DF..09E3 7193 UNKNOWN, // 09E4..09E5 7194 BENGALI, // 09E6..09FE 7195 UNKNOWN, // 09FF..0A00 7196 GURMUKHI, // 0A01..0A03 7197 UNKNOWN, // 0A04 7198 GURMUKHI, // 0A05..0A0A 7199 UNKNOWN, // 0A0B..0A0E 7200 GURMUKHI, // 0A0F..0A10 7201 UNKNOWN, // 0A11..0A12 7202 GURMUKHI, // 0A13..0A28 7203 UNKNOWN, // 0A29 7204 GURMUKHI, // 0A2A..0A30 7205 UNKNOWN, // 0A31 7206 GURMUKHI, // 0A32..0A33 7207 UNKNOWN, // 0A34 7208 GURMUKHI, // 0A35..0A36 7209 UNKNOWN, // 0A37 7210 GURMUKHI, // 0A38..0A39 7211 UNKNOWN, // 0A3A..0A3B 7212 GURMUKHI, // 0A3C 7213 UNKNOWN, // 0A3D 7214 GURMUKHI, // 0A3E..0A42 7215 UNKNOWN, // 0A43..0A46 7216 GURMUKHI, // 0A47..0A48 7217 UNKNOWN, // 0A49..0A4A 7218 GURMUKHI, // 0A4B..0A4D 7219 UNKNOWN, // 0A4E..0A50 7220 GURMUKHI, // 0A51 7221 UNKNOWN, // 0A52..0A58 7222 GURMUKHI, // 0A59..0A5C 7223 UNKNOWN, // 0A5D 7224 GURMUKHI, // 0A5E 7225 UNKNOWN, // 0A5F..0A65 7226 GURMUKHI, // 0A66..0A76 7227 UNKNOWN, // 0A77..0A80 7228 GUJARATI, // 0A81..0A83 7229 UNKNOWN, // 0A84 7230 GUJARATI, // 0A85..0A8D 7231 UNKNOWN, // 0A8E 7232 GUJARATI, // 0A8F..0A91 7233 UNKNOWN, // 0A92 7234 GUJARATI, // 0A93..0AA8 7235 UNKNOWN, // 0AA9 7236 GUJARATI, // 0AAA..0AB0 7237 UNKNOWN, // 0AB1 7238 GUJARATI, // 0AB2..0AB3 7239 UNKNOWN, // 0AB4 7240 GUJARATI, // 0AB5..0AB9 7241 UNKNOWN, // 0ABA..0ABB 7242 GUJARATI, // 0ABC..0AC5 7243 UNKNOWN, // 0AC6 7244 GUJARATI, // 0AC7..0AC9 7245 UNKNOWN, // 0ACA 7246 GUJARATI, // 0ACB..0ACD 7247 UNKNOWN, // 0ACE..0ACF 7248 GUJARATI, // 0AD0 7249 UNKNOWN, // 0AD1..0ADF 7250 GUJARATI, // 0AE0..0AE3 7251 UNKNOWN, // 0AE4..0AE5 7252 GUJARATI, // 0AE6..0AF1 7253 UNKNOWN, // 0AF2..0AF8 7254 GUJARATI, // 0AF9..0AFF 7255 UNKNOWN, // 0B00 7256 ORIYA, // 0B01..0B03 7257 UNKNOWN, // 0B04 7258 ORIYA, // 0B05..0B0C 7259 UNKNOWN, // 0B0D..0B0E 7260 ORIYA, // 0B0F..0B10 7261 UNKNOWN, // 0B11..0B12 7262 ORIYA, // 0B13..0B28 7263 UNKNOWN, // 0B29 7264 ORIYA, // 0B2A..0B30 7265 UNKNOWN, // 0B31 7266 ORIYA, // 0B32..0B33 7267 UNKNOWN, // 0B34 7268 ORIYA, // 0B35..0B39 7269 UNKNOWN, // 0B3A..0B3B 7270 ORIYA, // 0B3C..0B44 7271 UNKNOWN, // 0B45..0B46 7272 ORIYA, // 0B47..0B48 7273 UNKNOWN, // 0B49..0B4A 7274 ORIYA, // 0B4B..0B4D 7275 UNKNOWN, // 0B4E..0B54 7276 ORIYA, // 0B55..0B57 7277 UNKNOWN, // 0B58..0B5B 7278 ORIYA, // 0B5C..0B5D 7279 UNKNOWN, // 0B5E 7280 ORIYA, // 0B5F..0B63 7281 UNKNOWN, // 0B64..0B65 7282 ORIYA, // 0B66..0B77 7283 UNKNOWN, // 0B78..0B81 7284 TAMIL, // 0B82..0B83 7285 UNKNOWN, // 0B84 7286 TAMIL, // 0B85..0B8A 7287 UNKNOWN, // 0B8B..0B8D 7288 TAMIL, // 0B8E..0B90 7289 UNKNOWN, // 0B91 7290 TAMIL, // 0B92..0B95 7291 UNKNOWN, // 0B96..0B98 7292 TAMIL, // 0B99..0B9A 7293 UNKNOWN, // 0B9B 7294 TAMIL, // 0B9C 7295 UNKNOWN, // 0B9D 7296 TAMIL, // 0B9E..0B9F 7297 UNKNOWN, // 0BA0..0BA2 7298 TAMIL, // 0BA3..0BA4 7299 UNKNOWN, // 0BA5..0BA7 7300 TAMIL, // 0BA8..0BAA 7301 UNKNOWN, // 0BAB..0BAD 7302 TAMIL, // 0BAE..0BB9 7303 UNKNOWN, // 0BBA..0BBD 7304 TAMIL, // 0BBE..0BC2 7305 UNKNOWN, // 0BC3..0BC5 7306 TAMIL, // 0BC6..0BC8 7307 UNKNOWN, // 0BC9 7308 TAMIL, // 0BCA..0BCD 7309 UNKNOWN, // 0BCE..0BCF 7310 TAMIL, // 0BD0 7311 UNKNOWN, // 0BD1..0BD6 7312 TAMIL, // 0BD7 7313 UNKNOWN, // 0BD8..0BE5 7314 TAMIL, // 0BE6..0BFA 7315 UNKNOWN, // 0BFB..0BFF 7316 TELUGU, // 0C00..0C0C 7317 UNKNOWN, // 0C0D 7318 TELUGU, // 0C0E..0C10 7319 UNKNOWN, // 0C11 7320 TELUGU, // 0C12..0C28 7321 UNKNOWN, // 0C29 7322 TELUGU, // 0C2A..0C39 7323 UNKNOWN, // 0C3A..0C3B 7324 TELUGU, // 0C3C..0C44 7325 UNKNOWN, // 0C45 7326 TELUGU, // 0C46..0C48 7327 UNKNOWN, // 0C49 7328 TELUGU, // 0C4A..0C4D 7329 UNKNOWN, // 0C4E..0C54 7330 TELUGU, // 0C55..0C56 7331 UNKNOWN, // 0C57 7332 TELUGU, // 0C58..0C5A 7333 UNKNOWN, // 0C5B..0C5C 7334 TELUGU, // 0C5D 7335 UNKNOWN, // 0C5E..0C5F 7336 TELUGU, // 0C60..0C63 7337 UNKNOWN, // 0C64..0C65 7338 TELUGU, // 0C66..0C6F 7339 UNKNOWN, // 0C70..0C76 7340 TELUGU, // 0C77..0C7F 7341 KANNADA, // 0C80..0C8C 7342 UNKNOWN, // 0C8D 7343 KANNADA, // 0C8E..0C90 7344 UNKNOWN, // 0C91 7345 KANNADA, // 0C92..0CA8 7346 UNKNOWN, // 0CA9 7347 KANNADA, // 0CAA..0CB3 7348 UNKNOWN, // 0CB4 7349 KANNADA, // 0CB5..0CB9 7350 UNKNOWN, // 0CBA..0CBB 7351 KANNADA, // 0CBC..0CC4 7352 UNKNOWN, // 0CC5 7353 KANNADA, // 0CC6..0CC8 7354 UNKNOWN, // 0CC9 7355 KANNADA, // 0CCA..0CCD 7356 UNKNOWN, // 0CCE..0CD4 7357 KANNADA, // 0CD5..0CD6 7358 UNKNOWN, // 0CD7..0CDC 7359 KANNADA, // 0CDD..0CDE 7360 UNKNOWN, // 0CDF 7361 KANNADA, // 0CE0..0CE3 7362 UNKNOWN, // 0CE4..0CE5 7363 KANNADA, // 0CE6..0CEF 7364 UNKNOWN, // 0CF0 7365 KANNADA, // 0CF1..0CF3 7366 UNKNOWN, // 0CF4..0CFF 7367 MALAYALAM, // 0D00..0D0C 7368 UNKNOWN, // 0D0D 7369 MALAYALAM, // 0D0E..0D10 7370 UNKNOWN, // 0D11 7371 MALAYALAM, // 0D12..0D44 7372 UNKNOWN, // 0D45 7373 MALAYALAM, // 0D46..0D48 7374 UNKNOWN, // 0D49 7375 MALAYALAM, // 0D4A..0D4F 7376 UNKNOWN, // 0D50..0D53 7377 MALAYALAM, // 0D54..0D63 7378 UNKNOWN, // 0D64..0D65 7379 MALAYALAM, // 0D66..0D7F 7380 UNKNOWN, // 0D80 7381 SINHALA, // 0D81..0D83 7382 UNKNOWN, // 0D84 7383 SINHALA, // 0D85..0D96 7384 UNKNOWN, // 0D97..0D99 7385 SINHALA, // 0D9A..0DB1 7386 UNKNOWN, // 0DB2 7387 SINHALA, // 0DB3..0DBB 7388 UNKNOWN, // 0DBC 7389 SINHALA, // 0DBD 7390 UNKNOWN, // 0DBE..0DBF 7391 SINHALA, // 0DC0..0DC6 7392 UNKNOWN, // 0DC7..0DC9 7393 SINHALA, // 0DCA 7394 UNKNOWN, // 0DCB..0DCE 7395 SINHALA, // 0DCF..0DD4 7396 UNKNOWN, // 0DD5 7397 SINHALA, // 0DD6 7398 UNKNOWN, // 0DD7 7399 SINHALA, // 0DD8..0DDF 7400 UNKNOWN, // 0DE0..0DE5 7401 SINHALA, // 0DE6..0DEF 7402 UNKNOWN, // 0DF0..0DF1 7403 SINHALA, // 0DF2..0DF4 7404 UNKNOWN, // 0DF5..0E00 7405 THAI, // 0E01..0E3A 7406 UNKNOWN, // 0E3B..0E3E 7407 COMMON, // 0E3F 7408 THAI, // 0E40..0E5B 7409 UNKNOWN, // 0E5C..0E80 7410 LAO, // 0E81..0E82 7411 UNKNOWN, // 0E83 7412 LAO, // 0E84 7413 UNKNOWN, // 0E85 7414 LAO, // 0E86..0E8A 7415 UNKNOWN, // 0E8B 7416 LAO, // 0E8C..0EA3 7417 UNKNOWN, // 0EA4 7418 LAO, // 0EA5 7419 UNKNOWN, // 0EA6 7420 LAO, // 0EA7..0EBD 7421 UNKNOWN, // 0EBE..0EBF 7422 LAO, // 0EC0..0EC4 7423 UNKNOWN, // 0EC5 7424 LAO, // 0EC6 7425 UNKNOWN, // 0EC7 7426 LAO, // 0EC8..0ECE 7427 UNKNOWN, // 0ECF 7428 LAO, // 0ED0..0ED9 7429 UNKNOWN, // 0EDA..0EDB 7430 LAO, // 0EDC..0EDF 7431 UNKNOWN, // 0EE0..0EFF 7432 TIBETAN, // 0F00..0F47 7433 UNKNOWN, // 0F48 7434 TIBETAN, // 0F49..0F6C 7435 UNKNOWN, // 0F6D..0F70 7436 TIBETAN, // 0F71..0F97 7437 UNKNOWN, // 0F98 7438 TIBETAN, // 0F99..0FBC 7439 UNKNOWN, // 0FBD 7440 TIBETAN, // 0FBE..0FCC 7441 UNKNOWN, // 0FCD 7442 TIBETAN, // 0FCE..0FD4 7443 COMMON, // 0FD5..0FD8 7444 TIBETAN, // 0FD9..0FDA 7445 UNKNOWN, // 0FDB..0FFF 7446 MYANMAR, // 1000..109F 7447 GEORGIAN, // 10A0..10C5 7448 UNKNOWN, // 10C6 7449 GEORGIAN, // 10C7 7450 UNKNOWN, // 10C8..10CC 7451 GEORGIAN, // 10CD 7452 UNKNOWN, // 10CE..10CF 7453 GEORGIAN, // 10D0..10FA 7454 COMMON, // 10FB 7455 GEORGIAN, // 10FC..10FF 7456 HANGUL, // 1100..11FF 7457 ETHIOPIC, // 1200..1248 7458 UNKNOWN, // 1249 7459 ETHIOPIC, // 124A..124D 7460 UNKNOWN, // 124E..124F 7461 ETHIOPIC, // 1250..1256 7462 UNKNOWN, // 1257 7463 ETHIOPIC, // 1258 7464 UNKNOWN, // 1259 7465 ETHIOPIC, // 125A..125D 7466 UNKNOWN, // 125E..125F 7467 ETHIOPIC, // 1260..1288 7468 UNKNOWN, // 1289 7469 ETHIOPIC, // 128A..128D 7470 UNKNOWN, // 128E..128F 7471 ETHIOPIC, // 1290..12B0 7472 UNKNOWN, // 12B1 7473 ETHIOPIC, // 12B2..12B5 7474 UNKNOWN, // 12B6..12B7 7475 ETHIOPIC, // 12B8..12BE 7476 UNKNOWN, // 12BF 7477 ETHIOPIC, // 12C0 7478 UNKNOWN, // 12C1 7479 ETHIOPIC, // 12C2..12C5 7480 UNKNOWN, // 12C6..12C7 7481 ETHIOPIC, // 12C8..12D6 7482 UNKNOWN, // 12D7 7483 ETHIOPIC, // 12D8..1310 7484 UNKNOWN, // 1311 7485 ETHIOPIC, // 1312..1315 7486 UNKNOWN, // 1316..1317 7487 ETHIOPIC, // 1318..135A 7488 UNKNOWN, // 135B..135C 7489 ETHIOPIC, // 135D..137C 7490 UNKNOWN, // 137D..137F 7491 ETHIOPIC, // 1380..1399 7492 UNKNOWN, // 139A..139F 7493 CHEROKEE, // 13A0..13F5 7494 UNKNOWN, // 13F6..13F7 7495 CHEROKEE, // 13F8..13FD 7496 UNKNOWN, // 13FE..13FF 7497 CANADIAN_ABORIGINAL, // 1400..167F 7498 OGHAM, // 1680..169C 7499 UNKNOWN, // 169D..169F 7500 RUNIC, // 16A0..16EA 7501 COMMON, // 16EB..16ED 7502 RUNIC, // 16EE..16F8 7503 UNKNOWN, // 16F9..16FF 7504 TAGALOG, // 1700..1715 7505 UNKNOWN, // 1716..171E 7506 TAGALOG, // 171F 7507 HANUNOO, // 1720..1734 7508 COMMON, // 1735..1736 7509 UNKNOWN, // 1737..173F 7510 BUHID, // 1740..1753 7511 UNKNOWN, // 1754..175F 7512 TAGBANWA, // 1760..176C 7513 UNKNOWN, // 176D 7514 TAGBANWA, // 176E..1770 7515 UNKNOWN, // 1771 7516 TAGBANWA, // 1772..1773 7517 UNKNOWN, // 1774..177F 7518 KHMER, // 1780..17DD 7519 UNKNOWN, // 17DE..17DF 7520 KHMER, // 17E0..17E9 7521 UNKNOWN, // 17EA..17EF 7522 KHMER, // 17F0..17F9 7523 UNKNOWN, // 17FA..17FF 7524 MONGOLIAN, // 1800..1801 7525 COMMON, // 1802..1803 7526 MONGOLIAN, // 1804 7527 COMMON, // 1805 7528 MONGOLIAN, // 1806..1819 7529 UNKNOWN, // 181A..181F 7530 MONGOLIAN, // 1820..1878 7531 UNKNOWN, // 1879..187F 7532 MONGOLIAN, // 1880..18AA 7533 UNKNOWN, // 18AB..18AF 7534 CANADIAN_ABORIGINAL, // 18B0..18F5 7535 UNKNOWN, // 18F6..18FF 7536 LIMBU, // 1900..191E 7537 UNKNOWN, // 191F 7538 LIMBU, // 1920..192B 7539 UNKNOWN, // 192C..192F 7540 LIMBU, // 1930..193B 7541 UNKNOWN, // 193C..193F 7542 LIMBU, // 1940 7543 UNKNOWN, // 1941..1943 7544 LIMBU, // 1944..194F 7545 TAI_LE, // 1950..196D 7546 UNKNOWN, // 196E..196F 7547 TAI_LE, // 1970..1974 7548 UNKNOWN, // 1975..197F 7549 NEW_TAI_LUE, // 1980..19AB 7550 UNKNOWN, // 19AC..19AF 7551 NEW_TAI_LUE, // 19B0..19C9 7552 UNKNOWN, // 19CA..19CF 7553 NEW_TAI_LUE, // 19D0..19DA 7554 UNKNOWN, // 19DB..19DD 7555 NEW_TAI_LUE, // 19DE..19DF 7556 KHMER, // 19E0..19FF 7557 BUGINESE, // 1A00..1A1B 7558 UNKNOWN, // 1A1C..1A1D 7559 BUGINESE, // 1A1E..1A1F 7560 TAI_THAM, // 1A20..1A5E 7561 UNKNOWN, // 1A5F 7562 TAI_THAM, // 1A60..1A7C 7563 UNKNOWN, // 1A7D..1A7E 7564 TAI_THAM, // 1A7F..1A89 7565 UNKNOWN, // 1A8A..1A8F 7566 TAI_THAM, // 1A90..1A99 7567 UNKNOWN, // 1A9A..1A9F 7568 TAI_THAM, // 1AA0..1AAD 7569 UNKNOWN, // 1AAE..1AAF 7570 INHERITED, // 1AB0..1ACE 7571 UNKNOWN, // 1ACF..1AFF 7572 BALINESE, // 1B00..1B4C 7573 UNKNOWN, // 1B4D..1B4F 7574 BALINESE, // 1B50..1B7E 7575 UNKNOWN, // 1B7F 7576 SUNDANESE, // 1B80..1BBF 7577 BATAK, // 1BC0..1BF3 7578 UNKNOWN, // 1BF4..1BFB 7579 BATAK, // 1BFC..1BFF 7580 LEPCHA, // 1C00..1C37 7581 UNKNOWN, // 1C38..1C3A 7582 LEPCHA, // 1C3B..1C49 7583 UNKNOWN, // 1C4A..1C4C 7584 LEPCHA, // 1C4D..1C4F 7585 OL_CHIKI, // 1C50..1C7F 7586 CYRILLIC, // 1C80..1C88 7587 UNKNOWN, // 1C89..1C8F 7588 GEORGIAN, // 1C90..1CBA 7589 UNKNOWN, // 1CBB..1CBC 7590 GEORGIAN, // 1CBD..1CBF 7591 SUNDANESE, // 1CC0..1CC7 7592 UNKNOWN, // 1CC8..1CCF 7593 INHERITED, // 1CD0..1CD2 7594 COMMON, // 1CD3 7595 INHERITED, // 1CD4..1CE0 7596 COMMON, // 1CE1 7597 INHERITED, // 1CE2..1CE8 7598 COMMON, // 1CE9..1CEC 7599 INHERITED, // 1CED 7600 COMMON, // 1CEE..1CF3 7601 INHERITED, // 1CF4 7602 COMMON, // 1CF5..1CF7 7603 INHERITED, // 1CF8..1CF9 7604 COMMON, // 1CFA 7605 UNKNOWN, // 1CFB..1CFF 7606 LATIN, // 1D00..1D25 7607 GREEK, // 1D26..1D2A 7608 CYRILLIC, // 1D2B 7609 LATIN, // 1D2C..1D5C 7610 GREEK, // 1D5D..1D61 7611 LATIN, // 1D62..1D65 7612 GREEK, // 1D66..1D6A 7613 LATIN, // 1D6B..1D77 7614 CYRILLIC, // 1D78 7615 LATIN, // 1D79..1DBE 7616 GREEK, // 1DBF 7617 INHERITED, // 1DC0..1DFF 7618 LATIN, // 1E00..1EFF 7619 GREEK, // 1F00..1F15 7620 UNKNOWN, // 1F16..1F17 7621 GREEK, // 1F18..1F1D 7622 UNKNOWN, // 1F1E..1F1F 7623 GREEK, // 1F20..1F45 7624 UNKNOWN, // 1F46..1F47 7625 GREEK, // 1F48..1F4D 7626 UNKNOWN, // 1F4E..1F4F 7627 GREEK, // 1F50..1F57 7628 UNKNOWN, // 1F58 7629 GREEK, // 1F59 7630 UNKNOWN, // 1F5A 7631 GREEK, // 1F5B 7632 UNKNOWN, // 1F5C 7633 GREEK, // 1F5D 7634 UNKNOWN, // 1F5E 7635 GREEK, // 1F5F..1F7D 7636 UNKNOWN, // 1F7E..1F7F 7637 GREEK, // 1F80..1FB4 7638 UNKNOWN, // 1FB5 7639 GREEK, // 1FB6..1FC4 7640 UNKNOWN, // 1FC5 7641 GREEK, // 1FC6..1FD3 7642 UNKNOWN, // 1FD4..1FD5 7643 GREEK, // 1FD6..1FDB 7644 UNKNOWN, // 1FDC 7645 GREEK, // 1FDD..1FEF 7646 UNKNOWN, // 1FF0..1FF1 7647 GREEK, // 1FF2..1FF4 7648 UNKNOWN, // 1FF5 7649 GREEK, // 1FF6..1FFE 7650 UNKNOWN, // 1FFF 7651 COMMON, // 2000..200B 7652 INHERITED, // 200C..200D 7653 COMMON, // 200E..2064 7654 UNKNOWN, // 2065 7655 COMMON, // 2066..2070 7656 LATIN, // 2071 7657 UNKNOWN, // 2072..2073 7658 COMMON, // 2074..207E 7659 LATIN, // 207F 7660 COMMON, // 2080..208E 7661 UNKNOWN, // 208F 7662 LATIN, // 2090..209C 7663 UNKNOWN, // 209D..209F 7664 COMMON, // 20A0..20C0 7665 UNKNOWN, // 20C1..20CF 7666 INHERITED, // 20D0..20F0 7667 UNKNOWN, // 20F1..20FF 7668 COMMON, // 2100..2125 7669 GREEK, // 2126 7670 COMMON, // 2127..2129 7671 LATIN, // 212A..212B 7672 COMMON, // 212C..2131 7673 LATIN, // 2132 7674 COMMON, // 2133..214D 7675 LATIN, // 214E 7676 COMMON, // 214F..215F 7677 LATIN, // 2160..2188 7678 COMMON, // 2189..218B 7679 UNKNOWN, // 218C..218F 7680 COMMON, // 2190..2426 7681 UNKNOWN, // 2427..243F 7682 COMMON, // 2440..244A 7683 UNKNOWN, // 244B..245F 7684 COMMON, // 2460..27FF 7685 BRAILLE, // 2800..28FF 7686 COMMON, // 2900..2B73 7687 UNKNOWN, // 2B74..2B75 7688 COMMON, // 2B76..2B95 7689 UNKNOWN, // 2B96 7690 COMMON, // 2B97..2BFF 7691 GLAGOLITIC, // 2C00..2C5F 7692 LATIN, // 2C60..2C7F 7693 COPTIC, // 2C80..2CF3 7694 UNKNOWN, // 2CF4..2CF8 7695 COPTIC, // 2CF9..2CFF 7696 GEORGIAN, // 2D00..2D25 7697 UNKNOWN, // 2D26 7698 GEORGIAN, // 2D27 7699 UNKNOWN, // 2D28..2D2C 7700 GEORGIAN, // 2D2D 7701 UNKNOWN, // 2D2E..2D2F 7702 TIFINAGH, // 2D30..2D67 7703 UNKNOWN, // 2D68..2D6E 7704 TIFINAGH, // 2D6F..2D70 7705 UNKNOWN, // 2D71..2D7E 7706 TIFINAGH, // 2D7F 7707 ETHIOPIC, // 2D80..2D96 7708 UNKNOWN, // 2D97..2D9F 7709 ETHIOPIC, // 2DA0..2DA6 7710 UNKNOWN, // 2DA7 7711 ETHIOPIC, // 2DA8..2DAE 7712 UNKNOWN, // 2DAF 7713 ETHIOPIC, // 2DB0..2DB6 7714 UNKNOWN, // 2DB7 7715 ETHIOPIC, // 2DB8..2DBE 7716 UNKNOWN, // 2DBF 7717 ETHIOPIC, // 2DC0..2DC6 7718 UNKNOWN, // 2DC7 7719 ETHIOPIC, // 2DC8..2DCE 7720 UNKNOWN, // 2DCF 7721 ETHIOPIC, // 2DD0..2DD6 7722 UNKNOWN, // 2DD7 7723 ETHIOPIC, // 2DD8..2DDE 7724 UNKNOWN, // 2DDF 7725 CYRILLIC, // 2DE0..2DFF 7726 COMMON, // 2E00..2E5D 7727 UNKNOWN, // 2E5E..2E7F 7728 HAN, // 2E80..2E99 7729 UNKNOWN, // 2E9A 7730 HAN, // 2E9B..2EF3 7731 UNKNOWN, // 2EF4..2EFF 7732 HAN, // 2F00..2FD5 7733 UNKNOWN, // 2FD6..2FEF 7734 COMMON, // 2FF0..3004 7735 HAN, // 3005 7736 COMMON, // 3006 7737 HAN, // 3007 7738 COMMON, // 3008..3020 7739 HAN, // 3021..3029 7740 INHERITED, // 302A..302D 7741 HANGUL, // 302E..302F 7742 COMMON, // 3030..3037 7743 HAN, // 3038..303B 7744 COMMON, // 303C..303F 7745 UNKNOWN, // 3040 7746 HIRAGANA, // 3041..3096 7747 UNKNOWN, // 3097..3098 7748 INHERITED, // 3099..309A 7749 COMMON, // 309B..309C 7750 HIRAGANA, // 309D..309F 7751 COMMON, // 30A0 7752 KATAKANA, // 30A1..30FA 7753 COMMON, // 30FB..30FC 7754 KATAKANA, // 30FD..30FF 7755 UNKNOWN, // 3100..3104 7756 BOPOMOFO, // 3105..312F 7757 UNKNOWN, // 3130 7758 HANGUL, // 3131..318E 7759 UNKNOWN, // 318F 7760 COMMON, // 3190..319F 7761 BOPOMOFO, // 31A0..31BF 7762 COMMON, // 31C0..31E3 7763 UNKNOWN, // 31E4..31EE 7764 COMMON, // 31EF 7765 KATAKANA, // 31F0..31FF 7766 HANGUL, // 3200..321E 7767 UNKNOWN, // 321F 7768 COMMON, // 3220..325F 7769 HANGUL, // 3260..327E 7770 COMMON, // 327F..32CF 7771 KATAKANA, // 32D0..32FE 7772 COMMON, // 32FF 7773 KATAKANA, // 3300..3357 7774 COMMON, // 3358..33FF 7775 HAN, // 3400..4DBF 7776 COMMON, // 4DC0..4DFF 7777 HAN, // 4E00..9FFF 7778 YI, // A000..A48C 7779 UNKNOWN, // A48D..A48F 7780 YI, // A490..A4C6 7781 UNKNOWN, // A4C7..A4CF 7782 LISU, // A4D0..A4FF 7783 VAI, // A500..A62B 7784 UNKNOWN, // A62C..A63F 7785 CYRILLIC, // A640..A69F 7786 BAMUM, // A6A0..A6F7 7787 UNKNOWN, // A6F8..A6FF 7788 COMMON, // A700..A721 7789 LATIN, // A722..A787 7790 COMMON, // A788..A78A 7791 LATIN, // A78B..A7CA 7792 UNKNOWN, // A7CB..A7CF 7793 LATIN, // A7D0..A7D1 7794 UNKNOWN, // A7D2 7795 LATIN, // A7D3 7796 UNKNOWN, // A7D4 7797 LATIN, // A7D5..A7D9 7798 UNKNOWN, // A7DA..A7F1 7799 LATIN, // A7F2..A7FF 7800 SYLOTI_NAGRI, // A800..A82C 7801 UNKNOWN, // A82D..A82F 7802 COMMON, // A830..A839 7803 UNKNOWN, // A83A..A83F 7804 PHAGS_PA, // A840..A877 7805 UNKNOWN, // A878..A87F 7806 SAURASHTRA, // A880..A8C5 7807 UNKNOWN, // A8C6..A8CD 7808 SAURASHTRA, // A8CE..A8D9 7809 UNKNOWN, // A8DA..A8DF 7810 DEVANAGARI, // A8E0..A8FF 7811 KAYAH_LI, // A900..A92D 7812 COMMON, // A92E 7813 KAYAH_LI, // A92F 7814 REJANG, // A930..A953 7815 UNKNOWN, // A954..A95E 7816 REJANG, // A95F 7817 HANGUL, // A960..A97C 7818 UNKNOWN, // A97D..A97F 7819 JAVANESE, // A980..A9CD 7820 UNKNOWN, // A9CE 7821 COMMON, // A9CF 7822 JAVANESE, // A9D0..A9D9 7823 UNKNOWN, // A9DA..A9DD 7824 JAVANESE, // A9DE..A9DF 7825 MYANMAR, // A9E0..A9FE 7826 UNKNOWN, // A9FF 7827 CHAM, // AA00..AA36 7828 UNKNOWN, // AA37..AA3F 7829 CHAM, // AA40..AA4D 7830 UNKNOWN, // AA4E..AA4F 7831 CHAM, // AA50..AA59 7832 UNKNOWN, // AA5A..AA5B 7833 CHAM, // AA5C..AA5F 7834 MYANMAR, // AA60..AA7F 7835 TAI_VIET, // AA80..AAC2 7836 UNKNOWN, // AAC3..AADA 7837 TAI_VIET, // AADB..AADF 7838 MEETEI_MAYEK, // AAE0..AAF6 7839 UNKNOWN, // AAF7..AB00 7840 ETHIOPIC, // AB01..AB06 7841 UNKNOWN, // AB07..AB08 7842 ETHIOPIC, // AB09..AB0E 7843 UNKNOWN, // AB0F..AB10 7844 ETHIOPIC, // AB11..AB16 7845 UNKNOWN, // AB17..AB1F 7846 ETHIOPIC, // AB20..AB26 7847 UNKNOWN, // AB27 7848 ETHIOPIC, // AB28..AB2E 7849 UNKNOWN, // AB2F 7850 LATIN, // AB30..AB5A 7851 COMMON, // AB5B 7852 LATIN, // AB5C..AB64 7853 GREEK, // AB65 7854 LATIN, // AB66..AB69 7855 COMMON, // AB6A..AB6B 7856 UNKNOWN, // AB6C..AB6F 7857 CHEROKEE, // AB70..ABBF 7858 MEETEI_MAYEK, // ABC0..ABED 7859 UNKNOWN, // ABEE..ABEF 7860 MEETEI_MAYEK, // ABF0..ABF9 7861 UNKNOWN, // ABFA..ABFF 7862 HANGUL, // AC00..D7A3 7863 UNKNOWN, // D7A4..D7AF 7864 HANGUL, // D7B0..D7C6 7865 UNKNOWN, // D7C7..D7CA 7866 HANGUL, // D7CB..D7FB 7867 UNKNOWN, // D7FC..F8FF 7868 HAN, // F900..FA6D 7869 UNKNOWN, // FA6E..FA6F 7870 HAN, // FA70..FAD9 7871 UNKNOWN, // FADA..FAFF 7872 LATIN, // FB00..FB06 7873 UNKNOWN, // FB07..FB12 7874 ARMENIAN, // FB13..FB17 7875 UNKNOWN, // FB18..FB1C 7876 HEBREW, // FB1D..FB36 7877 UNKNOWN, // FB37 7878 HEBREW, // FB38..FB3C 7879 UNKNOWN, // FB3D 7880 HEBREW, // FB3E 7881 UNKNOWN, // FB3F 7882 HEBREW, // FB40..FB41 7883 UNKNOWN, // FB42 7884 HEBREW, // FB43..FB44 7885 UNKNOWN, // FB45 7886 HEBREW, // FB46..FB4F 7887 ARABIC, // FB50..FBC2 7888 UNKNOWN, // FBC3..FBD2 7889 ARABIC, // FBD3..FD3D 7890 COMMON, // FD3E..FD3F 7891 ARABIC, // FD40..FD8F 7892 UNKNOWN, // FD90..FD91 7893 ARABIC, // FD92..FDC7 7894 UNKNOWN, // FDC8..FDCE 7895 ARABIC, // FDCF 7896 UNKNOWN, // FDD0..FDEF 7897 ARABIC, // FDF0..FDFF 7898 INHERITED, // FE00..FE0F 7899 COMMON, // FE10..FE19 7900 UNKNOWN, // FE1A..FE1F 7901 INHERITED, // FE20..FE2D 7902 CYRILLIC, // FE2E..FE2F 7903 COMMON, // FE30..FE52 7904 UNKNOWN, // FE53 7905 COMMON, // FE54..FE66 7906 UNKNOWN, // FE67 7907 COMMON, // FE68..FE6B 7908 UNKNOWN, // FE6C..FE6F 7909 ARABIC, // FE70..FE74 7910 UNKNOWN, // FE75 7911 ARABIC, // FE76..FEFC 7912 UNKNOWN, // FEFD..FEFE 7913 COMMON, // FEFF 7914 UNKNOWN, // FF00 7915 COMMON, // FF01..FF20 7916 LATIN, // FF21..FF3A 7917 COMMON, // FF3B..FF40 7918 LATIN, // FF41..FF5A 7919 COMMON, // FF5B..FF65 7920 KATAKANA, // FF66..FF6F 7921 COMMON, // FF70 7922 KATAKANA, // FF71..FF9D 7923 COMMON, // FF9E..FF9F 7924 HANGUL, // FFA0..FFBE 7925 UNKNOWN, // FFBF..FFC1 7926 HANGUL, // FFC2..FFC7 7927 UNKNOWN, // FFC8..FFC9 7928 HANGUL, // FFCA..FFCF 7929 UNKNOWN, // FFD0..FFD1 7930 HANGUL, // FFD2..FFD7 7931 UNKNOWN, // FFD8..FFD9 7932 HANGUL, // FFDA..FFDC 7933 UNKNOWN, // FFDD..FFDF 7934 COMMON, // FFE0..FFE6 7935 UNKNOWN, // FFE7 7936 COMMON, // FFE8..FFEE 7937 UNKNOWN, // FFEF..FFF8 7938 COMMON, // FFF9..FFFD 7939 UNKNOWN, // FFFE..FFFF 7940 LINEAR_B, // 10000..1000B 7941 UNKNOWN, // 1000C 7942 LINEAR_B, // 1000D..10026 7943 UNKNOWN, // 10027 7944 LINEAR_B, // 10028..1003A 7945 UNKNOWN, // 1003B 7946 LINEAR_B, // 1003C..1003D 7947 UNKNOWN, // 1003E 7948 LINEAR_B, // 1003F..1004D 7949 UNKNOWN, // 1004E..1004F 7950 LINEAR_B, // 10050..1005D 7951 UNKNOWN, // 1005E..1007F 7952 LINEAR_B, // 10080..100FA 7953 UNKNOWN, // 100FB..100FF 7954 COMMON, // 10100..10102 7955 UNKNOWN, // 10103..10106 7956 COMMON, // 10107..10133 7957 UNKNOWN, // 10134..10136 7958 COMMON, // 10137..1013F 7959 GREEK, // 10140..1018E 7960 UNKNOWN, // 1018F 7961 COMMON, // 10190..1019C 7962 UNKNOWN, // 1019D..1019F 7963 GREEK, // 101A0 7964 UNKNOWN, // 101A1..101CF 7965 COMMON, // 101D0..101FC 7966 INHERITED, // 101FD 7967 UNKNOWN, // 101FE..1027F 7968 LYCIAN, // 10280..1029C 7969 UNKNOWN, // 1029D..1029F 7970 CARIAN, // 102A0..102D0 7971 UNKNOWN, // 102D1..102DF 7972 INHERITED, // 102E0 7973 COMMON, // 102E1..102FB 7974 UNKNOWN, // 102FC..102FF 7975 OLD_ITALIC, // 10300..10323 7976 UNKNOWN, // 10324..1032C 7977 OLD_ITALIC, // 1032D..1032F 7978 GOTHIC, // 10330..1034A 7979 UNKNOWN, // 1034B..1034F 7980 OLD_PERMIC, // 10350..1037A 7981 UNKNOWN, // 1037B..1037F 7982 UGARITIC, // 10380..1039D 7983 UNKNOWN, // 1039E 7984 UGARITIC, // 1039F 7985 OLD_PERSIAN, // 103A0..103C3 7986 UNKNOWN, // 103C4..103C7 7987 OLD_PERSIAN, // 103C8..103D5 7988 UNKNOWN, // 103D6..103FF 7989 DESERET, // 10400..1044F 7990 SHAVIAN, // 10450..1047F 7991 OSMANYA, // 10480..1049D 7992 UNKNOWN, // 1049E..1049F 7993 OSMANYA, // 104A0..104A9 7994 UNKNOWN, // 104AA..104AF 7995 OSAGE, // 104B0..104D3 7996 UNKNOWN, // 104D4..104D7 7997 OSAGE, // 104D8..104FB 7998 UNKNOWN, // 104FC..104FF 7999 ELBASAN, // 10500..10527 8000 UNKNOWN, // 10528..1052F 8001 CAUCASIAN_ALBANIAN, // 10530..10563 8002 UNKNOWN, // 10564..1056E 8003 CAUCASIAN_ALBANIAN, // 1056F 8004 VITHKUQI, // 10570..1057A 8005 UNKNOWN, // 1057B 8006 VITHKUQI, // 1057C..1058A 8007 UNKNOWN, // 1058B 8008 VITHKUQI, // 1058C..10592 8009 UNKNOWN, // 10593 8010 VITHKUQI, // 10594..10595 8011 UNKNOWN, // 10596 8012 VITHKUQI, // 10597..105A1 8013 UNKNOWN, // 105A2 8014 VITHKUQI, // 105A3..105B1 8015 UNKNOWN, // 105B2 8016 VITHKUQI, // 105B3..105B9 8017 UNKNOWN, // 105BA 8018 VITHKUQI, // 105BB..105BC 8019 UNKNOWN, // 105BD..105FF 8020 LINEAR_A, // 10600..10736 8021 UNKNOWN, // 10737..1073F 8022 LINEAR_A, // 10740..10755 8023 UNKNOWN, // 10756..1075F 8024 LINEAR_A, // 10760..10767 8025 UNKNOWN, // 10768..1077F 8026 LATIN, // 10780..10785 8027 UNKNOWN, // 10786 8028 LATIN, // 10787..107B0 8029 UNKNOWN, // 107B1 8030 LATIN, // 107B2..107BA 8031 UNKNOWN, // 107BB..107FF 8032 CYPRIOT, // 10800..10805 8033 UNKNOWN, // 10806..10807 8034 CYPRIOT, // 10808 8035 UNKNOWN, // 10809 8036 CYPRIOT, // 1080A..10835 8037 UNKNOWN, // 10836 8038 CYPRIOT, // 10837..10838 8039 UNKNOWN, // 10839..1083B 8040 CYPRIOT, // 1083C 8041 UNKNOWN, // 1083D..1083E 8042 CYPRIOT, // 1083F 8043 IMPERIAL_ARAMAIC, // 10840..10855 8044 UNKNOWN, // 10856 8045 IMPERIAL_ARAMAIC, // 10857..1085F 8046 PALMYRENE, // 10860..1087F 8047 NABATAEAN, // 10880..1089E 8048 UNKNOWN, // 1089F..108A6 8049 NABATAEAN, // 108A7..108AF 8050 UNKNOWN, // 108B0..108DF 8051 HATRAN, // 108E0..108F2 8052 UNKNOWN, // 108F3 8053 HATRAN, // 108F4..108F5 8054 UNKNOWN, // 108F6..108FA 8055 HATRAN, // 108FB..108FF 8056 PHOENICIAN, // 10900..1091B 8057 UNKNOWN, // 1091C..1091E 8058 PHOENICIAN, // 1091F 8059 LYDIAN, // 10920..10939 8060 UNKNOWN, // 1093A..1093E 8061 LYDIAN, // 1093F 8062 UNKNOWN, // 10940..1097F 8063 MEROITIC_HIEROGLYPHS, // 10980..1099F 8064 MEROITIC_CURSIVE, // 109A0..109B7 8065 UNKNOWN, // 109B8..109BB 8066 MEROITIC_CURSIVE, // 109BC..109CF 8067 UNKNOWN, // 109D0..109D1 8068 MEROITIC_CURSIVE, // 109D2..109FF 8069 KHAROSHTHI, // 10A00..10A03 8070 UNKNOWN, // 10A04 8071 KHAROSHTHI, // 10A05..10A06 8072 UNKNOWN, // 10A07..10A0B 8073 KHAROSHTHI, // 10A0C..10A13 8074 UNKNOWN, // 10A14 8075 KHAROSHTHI, // 10A15..10A17 8076 UNKNOWN, // 10A18 8077 KHAROSHTHI, // 10A19..10A35 8078 UNKNOWN, // 10A36..10A37 8079 KHAROSHTHI, // 10A38..10A3A 8080 UNKNOWN, // 10A3B..10A3E 8081 KHAROSHTHI, // 10A3F..10A48 8082 UNKNOWN, // 10A49..10A4F 8083 KHAROSHTHI, // 10A50..10A58 8084 UNKNOWN, // 10A59..10A5F 8085 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8086 OLD_NORTH_ARABIAN, // 10A80..10A9F 8087 UNKNOWN, // 10AA0..10ABF 8088 MANICHAEAN, // 10AC0..10AE6 8089 UNKNOWN, // 10AE7..10AEA 8090 MANICHAEAN, // 10AEB..10AF6 8091 UNKNOWN, // 10AF7..10AFF 8092 AVESTAN, // 10B00..10B35 8093 UNKNOWN, // 10B36..10B38 8094 AVESTAN, // 10B39..10B3F 8095 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8096 UNKNOWN, // 10B56..10B57 8097 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8098 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8099 UNKNOWN, // 10B73..10B77 8100 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8101 PSALTER_PAHLAVI, // 10B80..10B91 8102 UNKNOWN, // 10B92..10B98 8103 PSALTER_PAHLAVI, // 10B99..10B9C 8104 UNKNOWN, // 10B9D..10BA8 8105 PSALTER_PAHLAVI, // 10BA9..10BAF 8106 UNKNOWN, // 10BB0..10BFF 8107 OLD_TURKIC, // 10C00..10C48 8108 UNKNOWN, // 10C49..10C7F 8109 OLD_HUNGARIAN, // 10C80..10CB2 8110 UNKNOWN, // 10CB3..10CBF 8111 OLD_HUNGARIAN, // 10CC0..10CF2 8112 UNKNOWN, // 10CF3..10CF9 8113 OLD_HUNGARIAN, // 10CFA..10CFF 8114 HANIFI_ROHINGYA, // 10D00..10D27 8115 UNKNOWN, // 10D28..10D2F 8116 HANIFI_ROHINGYA, // 10D30..10D39 8117 UNKNOWN, // 10D3A..10E5F 8118 ARABIC, // 10E60..10E7E 8119 UNKNOWN, // 10E7F 8120 YEZIDI, // 10E80..10EA9 8121 UNKNOWN, // 10EAA 8122 YEZIDI, // 10EAB..10EAD 8123 UNKNOWN, // 10EAE..10EAF 8124 YEZIDI, // 10EB0..10EB1 8125 UNKNOWN, // 10EB2..10EFC 8126 ARABIC, // 10EFD..10EFF 8127 OLD_SOGDIAN, // 10F00..10F27 8128 UNKNOWN, // 10F28..10F2F 8129 SOGDIAN, // 10F30..10F59 8130 UNKNOWN, // 10F5A..10F6F 8131 OLD_UYGHUR, // 10F70..10F89 8132 UNKNOWN, // 10F8A..10FAF 8133 CHORASMIAN, // 10FB0..10FCB 8134 UNKNOWN, // 10FCC..10FDF 8135 ELYMAIC, // 10FE0..10FF6 8136 UNKNOWN, // 10FF7..10FFF 8137 BRAHMI, // 11000..1104D 8138 UNKNOWN, // 1104E..11051 8139 BRAHMI, // 11052..11075 8140 UNKNOWN, // 11076..1107E 8141 BRAHMI, // 1107F 8142 KAITHI, // 11080..110C2 8143 UNKNOWN, // 110C3..110CC 8144 KAITHI, // 110CD 8145 UNKNOWN, // 110CE..110CF 8146 SORA_SOMPENG, // 110D0..110E8 8147 UNKNOWN, // 110E9..110EF 8148 SORA_SOMPENG, // 110F0..110F9 8149 UNKNOWN, // 110FA..110FF 8150 CHAKMA, // 11100..11134 8151 UNKNOWN, // 11135 8152 CHAKMA, // 11136..11147 8153 UNKNOWN, // 11148..1114F 8154 MAHAJANI, // 11150..11176 8155 UNKNOWN, // 11177..1117F 8156 SHARADA, // 11180..111DF 8157 UNKNOWN, // 111E0 8158 SINHALA, // 111E1..111F4 8159 UNKNOWN, // 111F5..111FF 8160 KHOJKI, // 11200..11211 8161 UNKNOWN, // 11212 8162 KHOJKI, // 11213..11241 8163 UNKNOWN, // 11242..1127F 8164 MULTANI, // 11280..11286 8165 UNKNOWN, // 11287 8166 MULTANI, // 11288 8167 UNKNOWN, // 11289 8168 MULTANI, // 1128A..1128D 8169 UNKNOWN, // 1128E 8170 MULTANI, // 1128F..1129D 8171 UNKNOWN, // 1129E 8172 MULTANI, // 1129F..112A9 8173 UNKNOWN, // 112AA..112AF 8174 KHUDAWADI, // 112B0..112EA 8175 UNKNOWN, // 112EB..112EF 8176 KHUDAWADI, // 112F0..112F9 8177 UNKNOWN, // 112FA..112FF 8178 GRANTHA, // 11300..11303 8179 UNKNOWN, // 11304 8180 GRANTHA, // 11305..1130C 8181 UNKNOWN, // 1130D..1130E 8182 GRANTHA, // 1130F..11310 8183 UNKNOWN, // 11311..11312 8184 GRANTHA, // 11313..11328 8185 UNKNOWN, // 11329 8186 GRANTHA, // 1132A..11330 8187 UNKNOWN, // 11331 8188 GRANTHA, // 11332..11333 8189 UNKNOWN, // 11334 8190 GRANTHA, // 11335..11339 8191 UNKNOWN, // 1133A 8192 INHERITED, // 1133B 8193 GRANTHA, // 1133C..11344 8194 UNKNOWN, // 11345..11346 8195 GRANTHA, // 11347..11348 8196 UNKNOWN, // 11349..1134A 8197 GRANTHA, // 1134B..1134D 8198 UNKNOWN, // 1134E..1134F 8199 GRANTHA, // 11350 8200 UNKNOWN, // 11351..11356 8201 GRANTHA, // 11357 8202 UNKNOWN, // 11358..1135C 8203 GRANTHA, // 1135D..11363 8204 UNKNOWN, // 11364..11365 8205 GRANTHA, // 11366..1136C 8206 UNKNOWN, // 1136D..1136F 8207 GRANTHA, // 11370..11374 8208 UNKNOWN, // 11375..113FF 8209 NEWA, // 11400..1145B 8210 UNKNOWN, // 1145C 8211 NEWA, // 1145D..11461 8212 UNKNOWN, // 11462..1147F 8213 TIRHUTA, // 11480..114C7 8214 UNKNOWN, // 114C8..114CF 8215 TIRHUTA, // 114D0..114D9 8216 UNKNOWN, // 114DA..1157F 8217 SIDDHAM, // 11580..115B5 8218 UNKNOWN, // 115B6..115B7 8219 SIDDHAM, // 115B8..115DD 8220 UNKNOWN, // 115DE..115FF 8221 MODI, // 11600..11644 8222 UNKNOWN, // 11645..1164F 8223 MODI, // 11650..11659 8224 UNKNOWN, // 1165A..1165F 8225 MONGOLIAN, // 11660..1166C 8226 UNKNOWN, // 1166D..1167F 8227 TAKRI, // 11680..116B9 8228 UNKNOWN, // 116BA..116BF 8229 TAKRI, // 116C0..116C9 8230 UNKNOWN, // 116CA..116FF 8231 AHOM, // 11700..1171A 8232 UNKNOWN, // 1171B..1171C 8233 AHOM, // 1171D..1172B 8234 UNKNOWN, // 1172C..1172F 8235 AHOM, // 11730..11746 8236 UNKNOWN, // 11747..117FF 8237 DOGRA, // 11800..1183B 8238 UNKNOWN, // 1183C..1189F 8239 WARANG_CITI, // 118A0..118F2 8240 UNKNOWN, // 118F3..118FE 8241 WARANG_CITI, // 118FF 8242 DIVES_AKURU, // 11900..11906 8243 UNKNOWN, // 11907..11908 8244 DIVES_AKURU, // 11909 8245 UNKNOWN, // 1190A..1190B 8246 DIVES_AKURU, // 1190C..11913 8247 UNKNOWN, // 11914 8248 DIVES_AKURU, // 11915..11916 8249 UNKNOWN, // 11917 8250 DIVES_AKURU, // 11918..11935 8251 UNKNOWN, // 11936 8252 DIVES_AKURU, // 11937..11938 8253 UNKNOWN, // 11939..1193A 8254 DIVES_AKURU, // 1193B..11946 8255 UNKNOWN, // 11947..1194F 8256 DIVES_AKURU, // 11950..11959 8257 UNKNOWN, // 1195A..1199F 8258 NANDINAGARI, // 119A0..119A7 8259 UNKNOWN, // 119A8..119A9 8260 NANDINAGARI, // 119AA..119D7 8261 UNKNOWN, // 119D8..119D9 8262 NANDINAGARI, // 119DA..119E4 8263 UNKNOWN, // 119E5..119FF 8264 ZANABAZAR_SQUARE, // 11A00..11A47 8265 UNKNOWN, // 11A48..11A4F 8266 SOYOMBO, // 11A50..11AA2 8267 UNKNOWN, // 11AA3..11AAF 8268 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8269 PAU_CIN_HAU, // 11AC0..11AF8 8270 UNKNOWN, // 11AF9..11AFF 8271 DEVANAGARI, // 11B00..11B09 8272 UNKNOWN, // 11B0A..11BFF 8273 BHAIKSUKI, // 11C00..11C08 8274 UNKNOWN, // 11C09 8275 BHAIKSUKI, // 11C0A..11C36 8276 UNKNOWN, // 11C37 8277 BHAIKSUKI, // 11C38..11C45 8278 UNKNOWN, // 11C46..11C4F 8279 BHAIKSUKI, // 11C50..11C6C 8280 UNKNOWN, // 11C6D..11C6F 8281 MARCHEN, // 11C70..11C8F 8282 UNKNOWN, // 11C90..11C91 8283 MARCHEN, // 11C92..11CA7 8284 UNKNOWN, // 11CA8 8285 MARCHEN, // 11CA9..11CB6 8286 UNKNOWN, // 11CB7..11CFF 8287 MASARAM_GONDI, // 11D00..11D06 8288 UNKNOWN, // 11D07 8289 MASARAM_GONDI, // 11D08..11D09 8290 UNKNOWN, // 11D0A 8291 MASARAM_GONDI, // 11D0B..11D36 8292 UNKNOWN, // 11D37..11D39 8293 MASARAM_GONDI, // 11D3A 8294 UNKNOWN, // 11D3B 8295 MASARAM_GONDI, // 11D3C..11D3D 8296 UNKNOWN, // 11D3E 8297 MASARAM_GONDI, // 11D3F..11D47 8298 UNKNOWN, // 11D48..11D4F 8299 MASARAM_GONDI, // 11D50..11D59 8300 UNKNOWN, // 11D5A..11D5F 8301 GUNJALA_GONDI, // 11D60..11D65 8302 UNKNOWN, // 11D66 8303 GUNJALA_GONDI, // 11D67..11D68 8304 UNKNOWN, // 11D69 8305 GUNJALA_GONDI, // 11D6A..11D8E 8306 UNKNOWN, // 11D8F 8307 GUNJALA_GONDI, // 11D90..11D91 8308 UNKNOWN, // 11D92 8309 GUNJALA_GONDI, // 11D93..11D98 8310 UNKNOWN, // 11D99..11D9F 8311 GUNJALA_GONDI, // 11DA0..11DA9 8312 UNKNOWN, // 11DAA..11EDF 8313 MAKASAR, // 11EE0..11EF8 8314 UNKNOWN, // 11EF9..11EFF 8315 KAWI, // 11F00..11F10 8316 UNKNOWN, // 11F11 8317 KAWI, // 11F12..11F3A 8318 UNKNOWN, // 11F3B..11F3D 8319 KAWI, // 11F3E..11F59 8320 UNKNOWN, // 11F5A..11FAF 8321 LISU, // 11FB0 8322 UNKNOWN, // 11FB1..11FBF 8323 TAMIL, // 11FC0..11FF1 8324 UNKNOWN, // 11FF2..11FFE 8325 TAMIL, // 11FFF 8326 CUNEIFORM, // 12000..12399 8327 UNKNOWN, // 1239A..123FF 8328 CUNEIFORM, // 12400..1246E 8329 UNKNOWN, // 1246F 8330 CUNEIFORM, // 12470..12474 8331 UNKNOWN, // 12475..1247F 8332 CUNEIFORM, // 12480..12543 8333 UNKNOWN, // 12544..12F8F 8334 CYPRO_MINOAN, // 12F90..12FF2 8335 UNKNOWN, // 12FF3..12FFF 8336 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8337 UNKNOWN, // 13456..143FF 8338 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8339 UNKNOWN, // 14647..167FF 8340 BAMUM, // 16800..16A38 8341 UNKNOWN, // 16A39..16A3F 8342 MRO, // 16A40..16A5E 8343 UNKNOWN, // 16A5F 8344 MRO, // 16A60..16A69 8345 UNKNOWN, // 16A6A..16A6D 8346 MRO, // 16A6E..16A6F 8347 TANGSA, // 16A70..16ABE 8348 UNKNOWN, // 16ABF 8349 TANGSA, // 16AC0..16AC9 8350 UNKNOWN, // 16ACA..16ACF 8351 BASSA_VAH, // 16AD0..16AED 8352 UNKNOWN, // 16AEE..16AEF 8353 BASSA_VAH, // 16AF0..16AF5 8354 UNKNOWN, // 16AF6..16AFF 8355 PAHAWH_HMONG, // 16B00..16B45 8356 UNKNOWN, // 16B46..16B4F 8357 PAHAWH_HMONG, // 16B50..16B59 8358 UNKNOWN, // 16B5A 8359 PAHAWH_HMONG, // 16B5B..16B61 8360 UNKNOWN, // 16B62 8361 PAHAWH_HMONG, // 16B63..16B77 8362 UNKNOWN, // 16B78..16B7C 8363 PAHAWH_HMONG, // 16B7D..16B8F 8364 UNKNOWN, // 16B90..16E3F 8365 MEDEFAIDRIN, // 16E40..16E9A 8366 UNKNOWN, // 16E9B..16EFF 8367 MIAO, // 16F00..16F4A 8368 UNKNOWN, // 16F4B..16F4E 8369 MIAO, // 16F4F..16F87 8370 UNKNOWN, // 16F88..16F8E 8371 MIAO, // 16F8F..16F9F 8372 UNKNOWN, // 16FA0..16FDF 8373 TANGUT, // 16FE0 8374 NUSHU, // 16FE1 8375 HAN, // 16FE2..16FE3 8376 KHITAN_SMALL_SCRIPT, // 16FE4 8377 UNKNOWN, // 16FE5..16FEF 8378 HAN, // 16FF0..16FF1 8379 UNKNOWN, // 16FF2..16FFF 8380 TANGUT, // 17000..187F7 8381 UNKNOWN, // 187F8..187FF 8382 TANGUT, // 18800..18AFF 8383 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8384 UNKNOWN, // 18CD6..18CFF 8385 TANGUT, // 18D00..18D08 8386 UNKNOWN, // 18D09..1AFEF 8387 KATAKANA, // 1AFF0..1AFF3 8388 UNKNOWN, // 1AFF4 8389 KATAKANA, // 1AFF5..1AFFB 8390 UNKNOWN, // 1AFFC 8391 KATAKANA, // 1AFFD..1AFFE 8392 UNKNOWN, // 1AFFF 8393 KATAKANA, // 1B000 8394 HIRAGANA, // 1B001..1B11F 8395 KATAKANA, // 1B120..1B122 8396 UNKNOWN, // 1B123..1B131 8397 HIRAGANA, // 1B132 8398 UNKNOWN, // 1B133..1B14F 8399 HIRAGANA, // 1B150..1B152 8400 UNKNOWN, // 1B153..1B154 8401 KATAKANA, // 1B155 8402 UNKNOWN, // 1B156..1B163 8403 KATAKANA, // 1B164..1B167 8404 UNKNOWN, // 1B168..1B16F 8405 NUSHU, // 1B170..1B2FB 8406 UNKNOWN, // 1B2FC..1BBFF 8407 DUPLOYAN, // 1BC00..1BC6A 8408 UNKNOWN, // 1BC6B..1BC6F 8409 DUPLOYAN, // 1BC70..1BC7C 8410 UNKNOWN, // 1BC7D..1BC7F 8411 DUPLOYAN, // 1BC80..1BC88 8412 UNKNOWN, // 1BC89..1BC8F 8413 DUPLOYAN, // 1BC90..1BC99 8414 UNKNOWN, // 1BC9A..1BC9B 8415 DUPLOYAN, // 1BC9C..1BC9F 8416 COMMON, // 1BCA0..1BCA3 8417 UNKNOWN, // 1BCA4..1CEFF 8418 INHERITED, // 1CF00..1CF2D 8419 UNKNOWN, // 1CF2E..1CF2F 8420 INHERITED, // 1CF30..1CF46 8421 UNKNOWN, // 1CF47..1CF4F 8422 COMMON, // 1CF50..1CFC3 8423 UNKNOWN, // 1CFC4..1CFFF 8424 COMMON, // 1D000..1D0F5 8425 UNKNOWN, // 1D0F6..1D0FF 8426 COMMON, // 1D100..1D126 8427 UNKNOWN, // 1D127..1D128 8428 COMMON, // 1D129..1D166 8429 INHERITED, // 1D167..1D169 8430 COMMON, // 1D16A..1D17A 8431 INHERITED, // 1D17B..1D182 8432 COMMON, // 1D183..1D184 8433 INHERITED, // 1D185..1D18B 8434 COMMON, // 1D18C..1D1A9 8435 INHERITED, // 1D1AA..1D1AD 8436 COMMON, // 1D1AE..1D1EA 8437 UNKNOWN, // 1D1EB..1D1FF 8438 GREEK, // 1D200..1D245 8439 UNKNOWN, // 1D246..1D2BF 8440 COMMON, // 1D2C0..1D2D3 8441 UNKNOWN, // 1D2D4..1D2DF 8442 COMMON, // 1D2E0..1D2F3 8443 UNKNOWN, // 1D2F4..1D2FF 8444 COMMON, // 1D300..1D356 8445 UNKNOWN, // 1D357..1D35F 8446 COMMON, // 1D360..1D378 8447 UNKNOWN, // 1D379..1D3FF 8448 COMMON, // 1D400..1D454 8449 UNKNOWN, // 1D455 8450 COMMON, // 1D456..1D49C 8451 UNKNOWN, // 1D49D 8452 COMMON, // 1D49E..1D49F 8453 UNKNOWN, // 1D4A0..1D4A1 8454 COMMON, // 1D4A2 8455 UNKNOWN, // 1D4A3..1D4A4 8456 COMMON, // 1D4A5..1D4A6 8457 UNKNOWN, // 1D4A7..1D4A8 8458 COMMON, // 1D4A9..1D4AC 8459 UNKNOWN, // 1D4AD 8460 COMMON, // 1D4AE..1D4B9 8461 UNKNOWN, // 1D4BA 8462 COMMON, // 1D4BB 8463 UNKNOWN, // 1D4BC 8464 COMMON, // 1D4BD..1D4C3 8465 UNKNOWN, // 1D4C4 8466 COMMON, // 1D4C5..1D505 8467 UNKNOWN, // 1D506 8468 COMMON, // 1D507..1D50A 8469 UNKNOWN, // 1D50B..1D50C 8470 COMMON, // 1D50D..1D514 8471 UNKNOWN, // 1D515 8472 COMMON, // 1D516..1D51C 8473 UNKNOWN, // 1D51D 8474 COMMON, // 1D51E..1D539 8475 UNKNOWN, // 1D53A 8476 COMMON, // 1D53B..1D53E 8477 UNKNOWN, // 1D53F 8478 COMMON, // 1D540..1D544 8479 UNKNOWN, // 1D545 8480 COMMON, // 1D546 8481 UNKNOWN, // 1D547..1D549 8482 COMMON, // 1D54A..1D550 8483 UNKNOWN, // 1D551 8484 COMMON, // 1D552..1D6A5 8485 UNKNOWN, // 1D6A6..1D6A7 8486 COMMON, // 1D6A8..1D7CB 8487 UNKNOWN, // 1D7CC..1D7CD 8488 COMMON, // 1D7CE..1D7FF 8489 SIGNWRITING, // 1D800..1DA8B 8490 UNKNOWN, // 1DA8C..1DA9A 8491 SIGNWRITING, // 1DA9B..1DA9F 8492 UNKNOWN, // 1DAA0 8493 SIGNWRITING, // 1DAA1..1DAAF 8494 UNKNOWN, // 1DAB0..1DEFF 8495 LATIN, // 1DF00..1DF1E 8496 UNKNOWN, // 1DF1F..1DF24 8497 LATIN, // 1DF25..1DF2A 8498 UNKNOWN, // 1DF2B..1DFFF 8499 GLAGOLITIC, // 1E000..1E006 8500 UNKNOWN, // 1E007 8501 GLAGOLITIC, // 1E008..1E018 8502 UNKNOWN, // 1E019..1E01A 8503 GLAGOLITIC, // 1E01B..1E021 8504 UNKNOWN, // 1E022 8505 GLAGOLITIC, // 1E023..1E024 8506 UNKNOWN, // 1E025 8507 GLAGOLITIC, // 1E026..1E02A 8508 UNKNOWN, // 1E02B..1E02F 8509 CYRILLIC, // 1E030..1E06D 8510 UNKNOWN, // 1E06E..1E08E 8511 CYRILLIC, // 1E08F 8512 UNKNOWN, // 1E090..1E0FF 8513 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8514 UNKNOWN, // 1E12D..1E12F 8515 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8516 UNKNOWN, // 1E13E..1E13F 8517 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8518 UNKNOWN, // 1E14A..1E14D 8519 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8520 UNKNOWN, // 1E150..1E28F 8521 TOTO, // 1E290..1E2AE 8522 UNKNOWN, // 1E2AF..1E2BF 8523 WANCHO, // 1E2C0..1E2F9 8524 UNKNOWN, // 1E2FA..1E2FE 8525 WANCHO, // 1E2FF 8526 UNKNOWN, // 1E300..1E4CF 8527 NAG_MUNDARI, // 1E4D0..1E4F9 8528 UNKNOWN, // 1E4FA..1E7DF 8529 ETHIOPIC, // 1E7E0..1E7E6 8530 UNKNOWN, // 1E7E7 8531 ETHIOPIC, // 1E7E8..1E7EB 8532 UNKNOWN, // 1E7EC 8533 ETHIOPIC, // 1E7ED..1E7EE 8534 UNKNOWN, // 1E7EF 8535 ETHIOPIC, // 1E7F0..1E7FE 8536 UNKNOWN, // 1E7FF 8537 MENDE_KIKAKUI, // 1E800..1E8C4 8538 UNKNOWN, // 1E8C5..1E8C6 8539 MENDE_KIKAKUI, // 1E8C7..1E8D6 8540 UNKNOWN, // 1E8D7..1E8FF 8541 ADLAM, // 1E900..1E94B 8542 UNKNOWN, // 1E94C..1E94F 8543 ADLAM, // 1E950..1E959 8544 UNKNOWN, // 1E95A..1E95D 8545 ADLAM, // 1E95E..1E95F 8546 UNKNOWN, // 1E960..1EC70 8547 COMMON, // 1EC71..1ECB4 8548 UNKNOWN, // 1ECB5..1ED00 8549 COMMON, // 1ED01..1ED3D 8550 UNKNOWN, // 1ED3E..1EDFF 8551 ARABIC, // 1EE00..1EE03 8552 UNKNOWN, // 1EE04 8553 ARABIC, // 1EE05..1EE1F 8554 UNKNOWN, // 1EE20 8555 ARABIC, // 1EE21..1EE22 8556 UNKNOWN, // 1EE23 8557 ARABIC, // 1EE24 8558 UNKNOWN, // 1EE25..1EE26 8559 ARABIC, // 1EE27 8560 UNKNOWN, // 1EE28 8561 ARABIC, // 1EE29..1EE32 8562 UNKNOWN, // 1EE33 8563 ARABIC, // 1EE34..1EE37 8564 UNKNOWN, // 1EE38 8565 ARABIC, // 1EE39 8566 UNKNOWN, // 1EE3A 8567 ARABIC, // 1EE3B 8568 UNKNOWN, // 1EE3C..1EE41 8569 ARABIC, // 1EE42 8570 UNKNOWN, // 1EE43..1EE46 8571 ARABIC, // 1EE47 8572 UNKNOWN, // 1EE48 8573 ARABIC, // 1EE49 8574 UNKNOWN, // 1EE4A 8575 ARABIC, // 1EE4B 8576 UNKNOWN, // 1EE4C 8577 ARABIC, // 1EE4D..1EE4F 8578 UNKNOWN, // 1EE50 8579 ARABIC, // 1EE51..1EE52 8580 UNKNOWN, // 1EE53 8581 ARABIC, // 1EE54 8582 UNKNOWN, // 1EE55..1EE56 8583 ARABIC, // 1EE57 8584 UNKNOWN, // 1EE58 8585 ARABIC, // 1EE59 8586 UNKNOWN, // 1EE5A 8587 ARABIC, // 1EE5B 8588 UNKNOWN, // 1EE5C 8589 ARABIC, // 1EE5D 8590 UNKNOWN, // 1EE5E 8591 ARABIC, // 1EE5F 8592 UNKNOWN, // 1EE60 8593 ARABIC, // 1EE61..1EE62 8594 UNKNOWN, // 1EE63 8595 ARABIC, // 1EE64 8596 UNKNOWN, // 1EE65..1EE66 8597 ARABIC, // 1EE67..1EE6A 8598 UNKNOWN, // 1EE6B 8599 ARABIC, // 1EE6C..1EE72 8600 UNKNOWN, // 1EE73 8601 ARABIC, // 1EE74..1EE77 8602 UNKNOWN, // 1EE78 8603 ARABIC, // 1EE79..1EE7C 8604 UNKNOWN, // 1EE7D 8605 ARABIC, // 1EE7E 8606 UNKNOWN, // 1EE7F 8607 ARABIC, // 1EE80..1EE89 8608 UNKNOWN, // 1EE8A 8609 ARABIC, // 1EE8B..1EE9B 8610 UNKNOWN, // 1EE9C..1EEA0 8611 ARABIC, // 1EEA1..1EEA3 8612 UNKNOWN, // 1EEA4 8613 ARABIC, // 1EEA5..1EEA9 8614 UNKNOWN, // 1EEAA 8615 ARABIC, // 1EEAB..1EEBB 8616 UNKNOWN, // 1EEBC..1EEEF 8617 ARABIC, // 1EEF0..1EEF1 8618 UNKNOWN, // 1EEF2..1EFFF 8619 COMMON, // 1F000..1F02B 8620 UNKNOWN, // 1F02C..1F02F 8621 COMMON, // 1F030..1F093 8622 UNKNOWN, // 1F094..1F09F 8623 COMMON, // 1F0A0..1F0AE 8624 UNKNOWN, // 1F0AF..1F0B0 8625 COMMON, // 1F0B1..1F0BF 8626 UNKNOWN, // 1F0C0 8627 COMMON, // 1F0C1..1F0CF 8628 UNKNOWN, // 1F0D0 8629 COMMON, // 1F0D1..1F0F5 8630 UNKNOWN, // 1F0F6..1F0FF 8631 COMMON, // 1F100..1F1AD 8632 UNKNOWN, // 1F1AE..1F1E5 8633 COMMON, // 1F1E6..1F1FF 8634 HIRAGANA, // 1F200 8635 COMMON, // 1F201..1F202 8636 UNKNOWN, // 1F203..1F20F 8637 COMMON, // 1F210..1F23B 8638 UNKNOWN, // 1F23C..1F23F 8639 COMMON, // 1F240..1F248 8640 UNKNOWN, // 1F249..1F24F 8641 COMMON, // 1F250..1F251 8642 UNKNOWN, // 1F252..1F25F 8643 COMMON, // 1F260..1F265 8644 UNKNOWN, // 1F266..1F2FF 8645 COMMON, // 1F300..1F6D7 8646 UNKNOWN, // 1F6D8..1F6DB 8647 COMMON, // 1F6DC..1F6EC 8648 UNKNOWN, // 1F6ED..1F6EF 8649 COMMON, // 1F6F0..1F6FC 8650 UNKNOWN, // 1F6FD..1F6FF 8651 COMMON, // 1F700..1F776 8652 UNKNOWN, // 1F777..1F77A 8653 COMMON, // 1F77B..1F7D9 8654 UNKNOWN, // 1F7DA..1F7DF 8655 COMMON, // 1F7E0..1F7EB 8656 UNKNOWN, // 1F7EC..1F7EF 8657 COMMON, // 1F7F0 8658 UNKNOWN, // 1F7F1..1F7FF 8659 COMMON, // 1F800..1F80B 8660 UNKNOWN, // 1F80C..1F80F 8661 COMMON, // 1F810..1F847 8662 UNKNOWN, // 1F848..1F84F 8663 COMMON, // 1F850..1F859 8664 UNKNOWN, // 1F85A..1F85F 8665 COMMON, // 1F860..1F887 8666 UNKNOWN, // 1F888..1F88F 8667 COMMON, // 1F890..1F8AD 8668 UNKNOWN, // 1F8AE..1F8AF 8669 COMMON, // 1F8B0..1F8B1 8670 UNKNOWN, // 1F8B2..1F8FF 8671 COMMON, // 1F900..1FA53 8672 UNKNOWN, // 1FA54..1FA5F 8673 COMMON, // 1FA60..1FA6D 8674 UNKNOWN, // 1FA6E..1FA6F 8675 COMMON, // 1FA70..1FA7C 8676 UNKNOWN, // 1FA7D..1FA7F 8677 COMMON, // 1FA80..1FA88 8678 UNKNOWN, // 1FA89..1FA8F 8679 COMMON, // 1FA90..1FABD 8680 UNKNOWN, // 1FABE 8681 COMMON, // 1FABF..1FAC5 8682 UNKNOWN, // 1FAC6..1FACD 8683 COMMON, // 1FACE..1FADB 8684 UNKNOWN, // 1FADC..1FADF 8685 COMMON, // 1FAE0..1FAE8 8686 UNKNOWN, // 1FAE9..1FAEF 8687 COMMON, // 1FAF0..1FAF8 8688 UNKNOWN, // 1FAF9..1FAFF 8689 COMMON, // 1FB00..1FB92 8690 UNKNOWN, // 1FB93 8691 COMMON, // 1FB94..1FBCA 8692 UNKNOWN, // 1FBCB..1FBEF 8693 COMMON, // 1FBF0..1FBF9 8694 UNKNOWN, // 1FBFA..1FFFF 8695 HAN, // 20000..2A6DF 8696 UNKNOWN, // 2A6E0..2A6FF 8697 HAN, // 2A700..2B739 8698 UNKNOWN, // 2B73A..2B73F 8699 HAN, // 2B740..2B81D 8700 UNKNOWN, // 2B81E..2B81F 8701 HAN, // 2B820..2CEA1 8702 UNKNOWN, // 2CEA2..2CEAF 8703 HAN, // 2CEB0..2EBE0 8704 UNKNOWN, // 2EBE1..2EBEF 8705 HAN, // 2EBF0..2EE5D 8706 UNKNOWN, // 2EE5E..2F7FF 8707 HAN, // 2F800..2FA1D 8708 UNKNOWN, // 2FA1E..2FFFF 8709 HAN, // 30000..3134A 8710 UNKNOWN, // 3134B..3134F 8711 HAN, // 31350..323AF 8712 UNKNOWN, // 323B0..E0000 8713 COMMON, // E0001 8714 UNKNOWN, // E0002..E001F 8715 COMMON, // E0020..E007F 8716 UNKNOWN, // E0080..E00FF 8717 INHERITED, // E0100..E01EF 8718 UNKNOWN, // E01F0..10FFFF 8719 }; 8720 8721 private static final HashMap<String, Character.UnicodeScript> aliases; 8722 static { 8723 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 8724 aliases.put("ADLM", ADLAM); 8725 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8726 aliases.put("AHOM", AHOM); 8727 aliases.put("ARAB", ARABIC); 8728 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8729 aliases.put("ARMN", ARMENIAN); 8730 aliases.put("AVST", AVESTAN); 8731 aliases.put("BALI", BALINESE); 8732 aliases.put("BAMU", BAMUM); 8733 aliases.put("BASS", BASSA_VAH); 8734 aliases.put("BATK", BATAK); 8735 aliases.put("BENG", BENGALI); 8736 aliases.put("BHKS", BHAIKSUKI); 8737 aliases.put("BOPO", BOPOMOFO); 8738 aliases.put("BRAH", BRAHMI); 8739 aliases.put("BRAI", BRAILLE); 8740 aliases.put("BUGI", BUGINESE); 8741 aliases.put("BUHD", BUHID); 8742 aliases.put("CAKM", CHAKMA); 8743 aliases.put("CANS", CANADIAN_ABORIGINAL); 8744 aliases.put("CARI", CARIAN); 8745 aliases.put("CHAM", CHAM); 8746 aliases.put("CHER", CHEROKEE); 8747 aliases.put("CHRS", CHORASMIAN); 8748 aliases.put("COPT", COPTIC); 8749 aliases.put("CPMN", CYPRO_MINOAN); 8750 aliases.put("CPRT", CYPRIOT); 8751 aliases.put("CYRL", CYRILLIC); 8752 aliases.put("DEVA", DEVANAGARI); 8753 aliases.put("DIAK", DIVES_AKURU); 8754 aliases.put("DOGR", DOGRA); 8755 aliases.put("DSRT", DESERET); 8756 aliases.put("DUPL", DUPLOYAN); 8757 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 8758 aliases.put("ELBA", ELBASAN); 8759 aliases.put("ELYM", ELYMAIC); 8760 aliases.put("ETHI", ETHIOPIC); 8761 aliases.put("GEOR", GEORGIAN); 8762 aliases.put("GLAG", GLAGOLITIC); 8763 aliases.put("GONG", GUNJALA_GONDI); 8764 aliases.put("GONM", MASARAM_GONDI); 8765 aliases.put("GOTH", GOTHIC); 8766 aliases.put("GRAN", GRANTHA); 8767 aliases.put("GREK", GREEK); 8768 aliases.put("GUJR", GUJARATI); 8769 aliases.put("GURU", GURMUKHI); 8770 aliases.put("HANG", HANGUL); 8771 aliases.put("HANI", HAN); 8772 aliases.put("HANO", HANUNOO); 8773 aliases.put("HATR", HATRAN); 8774 aliases.put("HEBR", HEBREW); 8775 aliases.put("HIRA", HIRAGANA); 8776 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 8777 aliases.put("HMNG", PAHAWH_HMONG); 8778 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 8779 aliases.put("HUNG", OLD_HUNGARIAN); 8780 aliases.put("ITAL", OLD_ITALIC); 8781 aliases.put("JAVA", JAVANESE); 8782 aliases.put("KALI", KAYAH_LI); 8783 aliases.put("KANA", KATAKANA); 8784 aliases.put("KAWI", KAWI); 8785 aliases.put("KHAR", KHAROSHTHI); 8786 aliases.put("KHMR", KHMER); 8787 aliases.put("KHOJ", KHOJKI); 8788 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 8789 aliases.put("KNDA", KANNADA); 8790 aliases.put("KTHI", KAITHI); 8791 aliases.put("LANA", TAI_THAM); 8792 aliases.put("LAOO", LAO); 8793 aliases.put("LATN", LATIN); 8794 aliases.put("LEPC", LEPCHA); 8795 aliases.put("LIMB", LIMBU); 8796 aliases.put("LINA", LINEAR_A); 8797 aliases.put("LINB", LINEAR_B); 8798 aliases.put("LISU", LISU); 8799 aliases.put("LYCI", LYCIAN); 8800 aliases.put("LYDI", LYDIAN); 8801 aliases.put("MAHJ", MAHAJANI); 8802 aliases.put("MAKA", MAKASAR); 8803 aliases.put("MAND", MANDAIC); 8804 aliases.put("MANI", MANICHAEAN); 8805 aliases.put("MARC", MARCHEN); 8806 aliases.put("MEDF", MEDEFAIDRIN); 8807 aliases.put("MEND", MENDE_KIKAKUI); 8808 aliases.put("MERC", MEROITIC_CURSIVE); 8809 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 8810 aliases.put("MLYM", MALAYALAM); 8811 aliases.put("MODI", MODI); 8812 aliases.put("MONG", MONGOLIAN); 8813 aliases.put("MROO", MRO); 8814 aliases.put("MTEI", MEETEI_MAYEK); 8815 aliases.put("MULT", MULTANI); 8816 aliases.put("MYMR", MYANMAR); 8817 aliases.put("NAGM", NAG_MUNDARI); 8818 aliases.put("NAND", NANDINAGARI); 8819 aliases.put("NARB", OLD_NORTH_ARABIAN); 8820 aliases.put("NBAT", NABATAEAN); 8821 aliases.put("NEWA", NEWA); 8822 aliases.put("NKOO", NKO); 8823 aliases.put("NSHU", NUSHU); 8824 aliases.put("OGAM", OGHAM); 8825 aliases.put("OLCK", OL_CHIKI); 8826 aliases.put("ORKH", OLD_TURKIC); 8827 aliases.put("ORYA", ORIYA); 8828 aliases.put("OSGE", OSAGE); 8829 aliases.put("OSMA", OSMANYA); 8830 aliases.put("OUGR", OLD_UYGHUR); 8831 aliases.put("PALM", PALMYRENE); 8832 aliases.put("PAUC", PAU_CIN_HAU); 8833 aliases.put("PERM", OLD_PERMIC); 8834 aliases.put("PHAG", PHAGS_PA); 8835 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8836 aliases.put("PHLP", PSALTER_PAHLAVI); 8837 aliases.put("PHNX", PHOENICIAN); 8838 aliases.put("PLRD", MIAO); 8839 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8840 aliases.put("RJNG", REJANG); 8841 aliases.put("ROHG", HANIFI_ROHINGYA); 8842 aliases.put("RUNR", RUNIC); 8843 aliases.put("SAMR", SAMARITAN); 8844 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8845 aliases.put("SAUR", SAURASHTRA); 8846 aliases.put("SGNW", SIGNWRITING); 8847 aliases.put("SHAW", SHAVIAN); 8848 aliases.put("SHRD", SHARADA); 8849 aliases.put("SIDD", SIDDHAM); 8850 aliases.put("SIND", KHUDAWADI); 8851 aliases.put("SINH", SINHALA); 8852 aliases.put("SOGD", SOGDIAN); 8853 aliases.put("SOGO", OLD_SOGDIAN); 8854 aliases.put("SORA", SORA_SOMPENG); 8855 aliases.put("SOYO", SOYOMBO); 8856 aliases.put("SUND", SUNDANESE); 8857 aliases.put("SYLO", SYLOTI_NAGRI); 8858 aliases.put("SYRC", SYRIAC); 8859 aliases.put("TAGB", TAGBANWA); 8860 aliases.put("TAKR", TAKRI); 8861 aliases.put("TALE", TAI_LE); 8862 aliases.put("TALU", NEW_TAI_LUE); 8863 aliases.put("TAML", TAMIL); 8864 aliases.put("TANG", TANGUT); 8865 aliases.put("TAVT", TAI_VIET); 8866 aliases.put("TELU", TELUGU); 8867 aliases.put("TFNG", TIFINAGH); 8868 aliases.put("TGLG", TAGALOG); 8869 aliases.put("THAA", THAANA); 8870 aliases.put("THAI", THAI); 8871 aliases.put("TIBT", TIBETAN); 8872 aliases.put("TIRH", TIRHUTA); 8873 aliases.put("TNSA", TANGSA); 8874 aliases.put("TOTO", TOTO); 8875 aliases.put("UGAR", UGARITIC); 8876 aliases.put("VAII", VAI); 8877 aliases.put("VITH", VITHKUQI); 8878 aliases.put("WARA", WARANG_CITI); 8879 aliases.put("WCHO", WANCHO); 8880 aliases.put("XPEO", OLD_PERSIAN); 8881 aliases.put("XSUX", CUNEIFORM); 8882 aliases.put("YEZI", YEZIDI); 8883 aliases.put("YIII", YI); 8884 aliases.put("ZANB", ZANABAZAR_SQUARE); 8885 aliases.put("ZINH", INHERITED); 8886 aliases.put("ZYYY", COMMON); 8887 aliases.put("ZZZZ", UNKNOWN); 8888 } 8889 8890 /** 8891 * Returns the enum constant representing the Unicode script of which 8892 * the given character (Unicode code point) is assigned to. 8893 * 8894 * @param codePoint the character (Unicode code point) in question. 8895 * @return The {@code UnicodeScript} constant representing the 8896 * Unicode script of which this character is assigned to. 8897 * 8898 * @throws IllegalArgumentException if the specified 8899 * {@code codePoint} is an invalid Unicode code point. 8900 * @see Character#isValidCodePoint(int) 8901 * 8902 */ 8903 public static UnicodeScript of(int codePoint) { 8904 if (!isValidCodePoint(codePoint)) 8905 throw new IllegalArgumentException( 8906 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8907 int type = getType(codePoint); 8908 // leave SURROGATE and PRIVATE_USE for table lookup 8909 if (type == UNASSIGNED) 8910 return UNKNOWN; 8911 int index = Arrays.binarySearch(scriptStarts, codePoint); 8912 if (index < 0) 8913 index = -index - 2; 8914 return scripts[index]; 8915 } 8916 8917 /** 8918 * Returns the UnicodeScript constant with the given Unicode script 8919 * name or the script name alias. Script names and their aliases are 8920 * determined by The Unicode Standard. The files {@code Scripts.txt} 8921 * and {@code PropertyValueAliases.txt} define script names 8922 * and the script name aliases for a particular version of the 8923 * standard. The {@link Character} class specifies the version of 8924 * the standard that it supports. 8925 * <p> 8926 * Character case is ignored for all of the valid script names. 8927 * The en_US locale's case mapping rules are used to provide 8928 * case-insensitive string comparisons for script name validation. 8929 * 8930 * @param scriptName A {@code UnicodeScript} name. 8931 * @return The {@code UnicodeScript} constant identified 8932 * by {@code scriptName} 8933 * @throws IllegalArgumentException if {@code scriptName} is an 8934 * invalid name 8935 * @throws NullPointerException if {@code scriptName} is null 8936 */ 8937 public static final UnicodeScript forName(String scriptName) { 8938 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8939 //.replace(' ', '_')); 8940 UnicodeScript sc = aliases.get(scriptName); 8941 if (sc != null) 8942 return sc; 8943 return valueOf(scriptName); 8944 } 8945 } 8946 8947 /** 8948 * The value of the {@code Character}. 8949 * 8950 * @serial 8951 */ 8952 private final char value; 8953 8954 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8955 @java.io.Serial 8956 private static final long serialVersionUID = 3786198910865385080L; 8957 8958 /** 8959 * Constructs a newly allocated {@code Character} object that 8960 * represents the specified {@code char} value. 8961 * 8962 * @param value the value to be represented by the 8963 * {@code Character} object. 8964 * 8965 * @deprecated 8966 * It is rarely appropriate to use this constructor. The static factory 8967 * {@link #valueOf(char)} is generally a better choice, as it is 8968 * likely to yield significantly better space and time performance. 8969 */ 8970 @Deprecated(since="9", forRemoval = true) 8971 public Character(char value) { 8972 this.value = value; 8973 } 8974 8975 private static final class CharacterCache { 8976 private CharacterCache(){} 8977 8978 @Stable 8979 static final Character[] cache; 8980 static Character[] archivedCache; 8981 8982 static { 8983 int size = 127 + 1; 8984 8985 // Load and use the archived cache if it exists 8986 CDS.initializeFromArchive(CharacterCache.class); 8987 if (archivedCache == null) { 8988 Character[] c = new Character[size]; 8989 for (int i = 0; i < size; i++) { 8990 c[i] = new Character((char) i); 8991 } 8992 archivedCache = c; 8993 } 8994 cache = archivedCache; 8995 assert cache.length == size; 8996 } 8997 } 8998 8999 /** 9000 * Returns a {@code Character} instance representing the specified 9001 * {@code char} value. 9002 * If a new {@code Character} instance is not required, this method 9003 * should generally be used in preference to the constructor 9004 * {@link #Character(char)}, as this method is likely to yield 9005 * significantly better space and time performance by caching 9006 * frequently requested values. 9007 * 9008 * This method will always cache values in the range {@code 9009 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9010 * cache other values outside of this range. 9011 * 9012 * @param c a char value. 9013 * @return a {@code Character} instance representing {@code c}. 9014 * @since 1.5 9015 */ 9016 @IntrinsicCandidate 9017 public static Character valueOf(char c) { 9018 if (c <= 127) { // must cache 9019 return CharacterCache.cache[(int)c]; 9020 } 9021 return new Character(c); 9022 } 9023 9024 /** 9025 * Returns the value of this {@code Character} object. 9026 * @return the primitive {@code char} value represented by 9027 * this object. 9028 */ 9029 @IntrinsicCandidate 9030 public char charValue() { 9031 return value; 9032 } 9033 9034 /** 9035 * Returns a hash code for this {@code Character}; equal to the result 9036 * of invoking {@code charValue()}. 9037 * 9038 * @return a hash code value for this {@code Character} 9039 */ 9040 @Override 9041 public int hashCode() { 9042 return Character.hashCode(value); 9043 } 9044 9045 /** 9046 * Returns a hash code for a {@code char} value; compatible with 9047 * {@code Character.hashCode()}. 9048 * 9049 * @since 1.8 9050 * 9051 * @param value The {@code char} for which to return a hash code. 9052 * @return a hash code value for a {@code char} value. 9053 */ 9054 public static int hashCode(char value) { 9055 return (int)value; 9056 } 9057 9058 /** 9059 * Compares this object against the specified object. 9060 * The result is {@code true} if and only if the argument is not 9061 * {@code null} and is a {@code Character} object that 9062 * represents the same {@code char} value as this object. 9063 * 9064 * @param obj the object to compare with. 9065 * @return {@code true} if the objects are the same; 9066 * {@code false} otherwise. 9067 */ 9068 public boolean equals(Object obj) { 9069 if (obj instanceof Character c) { 9070 return value == c.charValue(); 9071 } 9072 return false; 9073 } 9074 9075 /** 9076 * Returns a {@code String} object representing this 9077 * {@code Character}'s value. The result is a string of 9078 * length 1 whose sole component is the primitive 9079 * {@code char} value represented by this 9080 * {@code Character} object. 9081 * 9082 * @return a string representation of this object. 9083 */ 9084 @Override 9085 public String toString() { 9086 return String.valueOf(value); 9087 } 9088 9089 /** 9090 * Returns a {@code String} object representing the 9091 * specified {@code char}. The result is a string of length 9092 * 1 consisting solely of the specified {@code char}. 9093 * 9094 * @apiNote This method cannot handle <a 9095 * href="#supplementary"> supplementary characters</a>. To support 9096 * all Unicode characters, including supplementary characters, use 9097 * the {@link #toString(int)} method. 9098 * 9099 * @param c the {@code char} to be converted 9100 * @return the string representation of the specified {@code char} 9101 * @since 1.4 9102 */ 9103 public static String toString(char c) { 9104 return String.valueOf(c); 9105 } 9106 9107 /** 9108 * Returns a {@code String} object representing the 9109 * specified character (Unicode code point). The result is a string of 9110 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9111 * 9112 * @param codePoint the {@code codePoint} to be converted 9113 * @return the string representation of the specified {@code codePoint} 9114 * @throws IllegalArgumentException if the specified 9115 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9116 * valid Unicode code point}. 9117 * @since 11 9118 */ 9119 public static String toString(int codePoint) { 9120 return String.valueOfCodePoint(codePoint); 9121 } 9122 9123 /** 9124 * Determines whether the specified code point is a valid 9125 * <a href="http://www.unicode.org/glossary/#code_point"> 9126 * Unicode code point value</a>. 9127 * 9128 * @param codePoint the Unicode code point to be tested 9129 * @return {@code true} if the specified code point value is between 9130 * {@link #MIN_CODE_POINT} and 9131 * {@link #MAX_CODE_POINT} inclusive; 9132 * {@code false} otherwise. 9133 * @since 1.5 9134 */ 9135 public static boolean isValidCodePoint(int codePoint) { 9136 // Optimized form of: 9137 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9138 int plane = codePoint >>> 16; 9139 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9140 } 9141 9142 /** 9143 * Determines whether the specified character (Unicode code point) 9144 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9145 * Such code points can be represented using a single {@code char}. 9146 * 9147 * @param codePoint the character (Unicode code point) to be tested 9148 * @return {@code true} if the specified code point is between 9149 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9150 * {@code false} otherwise. 9151 * @since 1.7 9152 */ 9153 public static boolean isBmpCodePoint(int codePoint) { 9154 return codePoint >>> 16 == 0; 9155 // Optimized form of: 9156 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9157 // We consistently use logical shift (>>>) to facilitate 9158 // additional runtime optimizations. 9159 } 9160 9161 /** 9162 * Determines whether the specified character (Unicode code point) 9163 * is in the <a href="#supplementary">supplementary character</a> range. 9164 * 9165 * @param codePoint the character (Unicode code point) to be tested 9166 * @return {@code true} if the specified code point is between 9167 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9168 * {@link #MAX_CODE_POINT} inclusive; 9169 * {@code false} otherwise. 9170 * @since 1.5 9171 */ 9172 public static boolean isSupplementaryCodePoint(int codePoint) { 9173 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9174 && codePoint < MAX_CODE_POINT + 1; 9175 } 9176 9177 /** 9178 * Determines if the given {@code char} value is a 9179 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9180 * Unicode high-surrogate code unit</a> 9181 * (also known as <i>leading-surrogate code unit</i>). 9182 * 9183 * <p>Such values do not represent characters by themselves, 9184 * but are used in the representation of 9185 * <a href="#supplementary">supplementary characters</a> 9186 * in the UTF-16 encoding. 9187 * 9188 * @param ch the {@code char} value to be tested. 9189 * @return {@code true} if the {@code char} value is between 9190 * {@link #MIN_HIGH_SURROGATE} and 9191 * {@link #MAX_HIGH_SURROGATE} inclusive; 9192 * {@code false} otherwise. 9193 * @see Character#isLowSurrogate(char) 9194 * @see Character.UnicodeBlock#of(int) 9195 * @since 1.5 9196 */ 9197 public static boolean isHighSurrogate(char ch) { 9198 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9199 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9200 } 9201 9202 /** 9203 * Determines if the given {@code char} value is a 9204 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9205 * Unicode low-surrogate code unit</a> 9206 * (also known as <i>trailing-surrogate code unit</i>). 9207 * 9208 * <p>Such values do not represent characters by themselves, 9209 * but are used in the representation of 9210 * <a href="#supplementary">supplementary characters</a> 9211 * in the UTF-16 encoding. 9212 * 9213 * @param ch the {@code char} value to be tested. 9214 * @return {@code true} if the {@code char} value is between 9215 * {@link #MIN_LOW_SURROGATE} and 9216 * {@link #MAX_LOW_SURROGATE} inclusive; 9217 * {@code false} otherwise. 9218 * @see Character#isHighSurrogate(char) 9219 * @since 1.5 9220 */ 9221 public static boolean isLowSurrogate(char ch) { 9222 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9223 } 9224 9225 /** 9226 * Determines if the given {@code char} value is a Unicode 9227 * <i>surrogate code unit</i>. 9228 * 9229 * <p>Such values do not represent characters by themselves, 9230 * but are used in the representation of 9231 * <a href="#supplementary">supplementary characters</a> 9232 * in the UTF-16 encoding. 9233 * 9234 * <p>A char value is a surrogate code unit if and only if it is either 9235 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9236 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9237 * 9238 * @param ch the {@code char} value to be tested. 9239 * @return {@code true} if the {@code char} value is between 9240 * {@link #MIN_SURROGATE} and 9241 * {@link #MAX_SURROGATE} inclusive; 9242 * {@code false} otherwise. 9243 * @since 1.7 9244 */ 9245 public static boolean isSurrogate(char ch) { 9246 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9247 } 9248 9249 /** 9250 * Determines whether the specified pair of {@code char} 9251 * values is a valid 9252 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9253 * Unicode surrogate pair</a>. 9254 * 9255 * <p>This method is equivalent to the expression: 9256 * <blockquote><pre>{@code 9257 * isHighSurrogate(high) && isLowSurrogate(low) 9258 * }</pre></blockquote> 9259 * 9260 * @param high the high-surrogate code value to be tested 9261 * @param low the low-surrogate code value to be tested 9262 * @return {@code true} if the specified high and 9263 * low-surrogate code values represent a valid surrogate pair; 9264 * {@code false} otherwise. 9265 * @since 1.5 9266 */ 9267 public static boolean isSurrogatePair(char high, char low) { 9268 return isHighSurrogate(high) && isLowSurrogate(low); 9269 } 9270 9271 /** 9272 * Determines the number of {@code char} values needed to 9273 * represent the specified character (Unicode code point). If the 9274 * specified character is equal to or greater than 0x10000, then 9275 * the method returns 2. Otherwise, the method returns 1. 9276 * 9277 * <p>This method doesn't validate the specified character to be a 9278 * valid Unicode code point. The caller must validate the 9279 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9280 * if necessary. 9281 * 9282 * @param codePoint the character (Unicode code point) to be tested. 9283 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9284 * @see Character#isSupplementaryCodePoint(int) 9285 * @since 1.5 9286 */ 9287 public static int charCount(int codePoint) { 9288 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9289 } 9290 9291 /** 9292 * Converts the specified surrogate pair to its supplementary code 9293 * point value. This method does not validate the specified 9294 * surrogate pair. The caller must validate it using {@link 9295 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9296 * 9297 * @param high the high-surrogate code unit 9298 * @param low the low-surrogate code unit 9299 * @return the supplementary code point composed from the 9300 * specified surrogate pair. 9301 * @since 1.5 9302 */ 9303 public static int toCodePoint(char high, char low) { 9304 // Optimized form of: 9305 // return ((high - MIN_HIGH_SURROGATE) << 10) 9306 // + (low - MIN_LOW_SURROGATE) 9307 // + MIN_SUPPLEMENTARY_CODE_POINT; 9308 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9309 - (MIN_HIGH_SURROGATE << 10) 9310 - MIN_LOW_SURROGATE); 9311 } 9312 9313 /** 9314 * Returns the code point at the given index of the 9315 * {@code CharSequence}. If the {@code char} value at 9316 * the given index in the {@code CharSequence} is in the 9317 * high-surrogate range, the following index is less than the 9318 * length of the {@code CharSequence}, and the 9319 * {@code char} value at the following index is in the 9320 * low-surrogate range, then the supplementary code point 9321 * corresponding to this surrogate pair is returned. Otherwise, 9322 * the {@code char} value at the given index is returned. 9323 * 9324 * @param seq a sequence of {@code char} values (Unicode code 9325 * units) 9326 * @param index the index to the {@code char} values (Unicode 9327 * code units) in {@code seq} to be converted 9328 * @return the Unicode code point at the given index 9329 * @throws NullPointerException if {@code seq} is null. 9330 * @throws IndexOutOfBoundsException if the value 9331 * {@code index} is negative or not less than 9332 * {@link CharSequence#length() seq.length()}. 9333 * @since 1.5 9334 */ 9335 public static int codePointAt(CharSequence seq, int index) { 9336 char c1 = seq.charAt(index); 9337 if (isHighSurrogate(c1) && ++index < seq.length()) { 9338 char c2 = seq.charAt(index); 9339 if (isLowSurrogate(c2)) { 9340 return toCodePoint(c1, c2); 9341 } 9342 } 9343 return c1; 9344 } 9345 9346 /** 9347 * Returns the code point at the given index of the 9348 * {@code char} array. If the {@code char} value at 9349 * the given index in the {@code char} array is in the 9350 * high-surrogate range, the following index is less than the 9351 * length of the {@code char} array, and the 9352 * {@code char} value at the following index is in the 9353 * low-surrogate range, then the supplementary code point 9354 * corresponding to this surrogate pair is returned. Otherwise, 9355 * the {@code char} value at the given index is returned. 9356 * 9357 * @param a the {@code char} array 9358 * @param index the index to the {@code char} values (Unicode 9359 * code units) in the {@code char} array to be converted 9360 * @return the Unicode code point at the given index 9361 * @throws NullPointerException if {@code a} is null. 9362 * @throws IndexOutOfBoundsException if the value 9363 * {@code index} is negative or not less than 9364 * the length of the {@code char} array. 9365 * @since 1.5 9366 */ 9367 public static int codePointAt(char[] a, int index) { 9368 return codePointAtImpl(a, index, a.length); 9369 } 9370 9371 /** 9372 * Returns the code point at the given index of the 9373 * {@code char} array, where only array elements with 9374 * {@code index} less than {@code limit} can be used. If 9375 * the {@code char} value at the given index in the 9376 * {@code char} array is in the high-surrogate range, the 9377 * following index is less than the {@code limit}, and the 9378 * {@code char} value at the following index is in the 9379 * low-surrogate range, then the supplementary code point 9380 * corresponding to this surrogate pair is returned. Otherwise, 9381 * the {@code char} value at the given index is returned. 9382 * 9383 * @param a the {@code char} array 9384 * @param index the index to the {@code char} values (Unicode 9385 * code units) in the {@code char} array to be converted 9386 * @param limit the index after the last array element that 9387 * can be used in the {@code char} array 9388 * @return the Unicode code point at the given index 9389 * @throws NullPointerException if {@code a} is null. 9390 * @throws IndexOutOfBoundsException if the {@code index} 9391 * argument is negative or not less than the {@code limit} 9392 * argument, or if the {@code limit} argument is negative or 9393 * greater than the length of the {@code char} array. 9394 * @since 1.5 9395 */ 9396 public static int codePointAt(char[] a, int index, int limit) { 9397 if (index >= limit || index < 0 || limit > a.length) { 9398 throw new IndexOutOfBoundsException(); 9399 } 9400 return codePointAtImpl(a, index, limit); 9401 } 9402 9403 // throws ArrayIndexOutOfBoundsException if index out of bounds 9404 static int codePointAtImpl(char[] a, int index, int limit) { 9405 char c1 = a[index]; 9406 if (isHighSurrogate(c1) && ++index < limit) { 9407 char c2 = a[index]; 9408 if (isLowSurrogate(c2)) { 9409 return toCodePoint(c1, c2); 9410 } 9411 } 9412 return c1; 9413 } 9414 9415 /** 9416 * Returns the code point preceding the given index of the 9417 * {@code CharSequence}. If the {@code char} value at 9418 * {@code (index - 1)} in the {@code CharSequence} is in 9419 * the low-surrogate range, {@code (index - 2)} is not 9420 * negative, and the {@code char} value at {@code (index - 2)} 9421 * in the {@code CharSequence} is in the 9422 * high-surrogate range, then the supplementary code point 9423 * corresponding to this surrogate pair is returned. Otherwise, 9424 * the {@code char} value at {@code (index - 1)} is 9425 * returned. 9426 * 9427 * @param seq the {@code CharSequence} instance 9428 * @param index the index following the code point that should be returned 9429 * @return the Unicode code point value before the given index. 9430 * @throws NullPointerException if {@code seq} is null. 9431 * @throws IndexOutOfBoundsException if the {@code index} 9432 * argument is less than 1 or greater than {@link 9433 * CharSequence#length() seq.length()}. 9434 * @since 1.5 9435 */ 9436 public static int codePointBefore(CharSequence seq, int index) { 9437 char c2 = seq.charAt(--index); 9438 if (isLowSurrogate(c2) && index > 0) { 9439 char c1 = seq.charAt(--index); 9440 if (isHighSurrogate(c1)) { 9441 return toCodePoint(c1, c2); 9442 } 9443 } 9444 return c2; 9445 } 9446 9447 /** 9448 * Returns the code point preceding the given index of the 9449 * {@code char} array. If the {@code char} value at 9450 * {@code (index - 1)} in the {@code char} array is in 9451 * the low-surrogate range, {@code (index - 2)} is not 9452 * negative, and the {@code char} value at {@code (index - 2)} 9453 * in the {@code char} array is in the 9454 * high-surrogate range, then the supplementary code point 9455 * corresponding to this surrogate pair is returned. Otherwise, 9456 * the {@code char} value at {@code (index - 1)} is 9457 * returned. 9458 * 9459 * @param a the {@code char} array 9460 * @param index the index following the code point that should be returned 9461 * @return the Unicode code point value before the given index. 9462 * @throws NullPointerException if {@code a} is null. 9463 * @throws IndexOutOfBoundsException if the {@code index} 9464 * argument is less than 1 or greater than the length of the 9465 * {@code char} array 9466 * @since 1.5 9467 */ 9468 public static int codePointBefore(char[] a, int index) { 9469 return codePointBeforeImpl(a, index, 0); 9470 } 9471 9472 /** 9473 * Returns the code point preceding the given index of the 9474 * {@code char} array, where only array elements with 9475 * {@code index} greater than or equal to {@code start} 9476 * can be used. If the {@code char} value at {@code (index - 1)} 9477 * in the {@code char} array is in the 9478 * low-surrogate range, {@code (index - 2)} is not less than 9479 * {@code start}, and the {@code char} value at 9480 * {@code (index - 2)} in the {@code char} array is in 9481 * the high-surrogate range, then the supplementary code point 9482 * corresponding to this surrogate pair is returned. Otherwise, 9483 * the {@code char} value at {@code (index - 1)} is 9484 * returned. 9485 * 9486 * @param a the {@code char} array 9487 * @param index the index following the code point that should be returned 9488 * @param start the index of the first array element in the 9489 * {@code char} array 9490 * @return the Unicode code point value before the given index. 9491 * @throws NullPointerException if {@code a} is null. 9492 * @throws IndexOutOfBoundsException if the {@code index} 9493 * argument is not greater than the {@code start} argument or 9494 * is greater than the length of the {@code char} array, or 9495 * if the {@code start} argument is negative or not less than 9496 * the length of the {@code char} array. 9497 * @since 1.5 9498 */ 9499 public static int codePointBefore(char[] a, int index, int start) { 9500 if (index <= start || start < 0 || index > a.length) { 9501 throw new IndexOutOfBoundsException(); 9502 } 9503 return codePointBeforeImpl(a, index, start); 9504 } 9505 9506 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 9507 static int codePointBeforeImpl(char[] a, int index, int start) { 9508 char c2 = a[--index]; 9509 if (isLowSurrogate(c2) && index > start) { 9510 char c1 = a[--index]; 9511 if (isHighSurrogate(c1)) { 9512 return toCodePoint(c1, c2); 9513 } 9514 } 9515 return c2; 9516 } 9517 9518 /** 9519 * Returns the leading surrogate (a 9520 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9521 * high surrogate code unit</a>) of the 9522 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9523 * surrogate pair</a> 9524 * representing the specified supplementary character (Unicode 9525 * code point) in the UTF-16 encoding. If the specified character 9526 * is not a 9527 * <a href="Character.html#supplementary">supplementary character</a>, 9528 * an unspecified {@code char} is returned. 9529 * 9530 * <p>If 9531 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9532 * is {@code true}, then 9533 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9534 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9535 * are also always {@code true}. 9536 * 9537 * @param codePoint a supplementary character (Unicode code point) 9538 * @return the leading surrogate code unit used to represent the 9539 * character in the UTF-16 encoding 9540 * @since 1.7 9541 */ 9542 public static char highSurrogate(int codePoint) { 9543 return (char) ((codePoint >>> 10) 9544 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9545 } 9546 9547 /** 9548 * Returns the trailing surrogate (a 9549 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9550 * low surrogate code unit</a>) of the 9551 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9552 * surrogate pair</a> 9553 * representing the specified supplementary character (Unicode 9554 * code point) in the UTF-16 encoding. If the specified character 9555 * is not a 9556 * <a href="Character.html#supplementary">supplementary character</a>, 9557 * an unspecified {@code char} is returned. 9558 * 9559 * <p>If 9560 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9561 * is {@code true}, then 9562 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9563 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9564 * are also always {@code true}. 9565 * 9566 * @param codePoint a supplementary character (Unicode code point) 9567 * @return the trailing surrogate code unit used to represent the 9568 * character in the UTF-16 encoding 9569 * @since 1.7 9570 */ 9571 public static char lowSurrogate(int codePoint) { 9572 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9573 } 9574 9575 /** 9576 * Converts the specified character (Unicode code point) to its 9577 * UTF-16 representation. If the specified code point is a BMP 9578 * (Basic Multilingual Plane or Plane 0) value, the same value is 9579 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9580 * specified code point is a supplementary character, its 9581 * surrogate values are stored in {@code dst[dstIndex]} 9582 * (high-surrogate) and {@code dst[dstIndex+1]} 9583 * (low-surrogate), and 2 is returned. 9584 * 9585 * @param codePoint the character (Unicode code point) to be converted. 9586 * @param dst an array of {@code char} in which the 9587 * {@code codePoint}'s UTF-16 value is stored. 9588 * @param dstIndex the start index into the {@code dst} 9589 * array where the converted value is stored. 9590 * @return 1 if the code point is a BMP code point, 2 if the 9591 * code point is a supplementary code point. 9592 * @throws IllegalArgumentException if the specified 9593 * {@code codePoint} is not a valid Unicode code point. 9594 * @throws NullPointerException if the specified {@code dst} is null. 9595 * @throws IndexOutOfBoundsException if {@code dstIndex} 9596 * is negative or not less than {@code dst.length}, or if 9597 * {@code dst} at {@code dstIndex} doesn't have enough 9598 * array element(s) to store the resulting {@code char} 9599 * value(s). (If {@code dstIndex} is equal to 9600 * {@code dst.length-1} and the specified 9601 * {@code codePoint} is a supplementary character, the 9602 * high-surrogate value is not stored in 9603 * {@code dst[dstIndex]}.) 9604 * @since 1.5 9605 */ 9606 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9607 if (isBmpCodePoint(codePoint)) { 9608 dst[dstIndex] = (char) codePoint; 9609 return 1; 9610 } else if (isValidCodePoint(codePoint)) { 9611 toSurrogates(codePoint, dst, dstIndex); 9612 return 2; 9613 } else { 9614 throw new IllegalArgumentException( 9615 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9616 } 9617 } 9618 9619 /** 9620 * Converts the specified character (Unicode code point) to its 9621 * UTF-16 representation stored in a {@code char} array. If 9622 * the specified code point is a BMP (Basic Multilingual Plane or 9623 * Plane 0) value, the resulting {@code char} array has 9624 * the same value as {@code codePoint}. If the specified code 9625 * point is a supplementary code point, the resulting 9626 * {@code char} array has the corresponding surrogate pair. 9627 * 9628 * @param codePoint a Unicode code point 9629 * @return a {@code char} array having 9630 * {@code codePoint}'s UTF-16 representation. 9631 * @throws IllegalArgumentException if the specified 9632 * {@code codePoint} is not a valid Unicode code point. 9633 * @since 1.5 9634 */ 9635 public static char[] toChars(int codePoint) { 9636 if (isBmpCodePoint(codePoint)) { 9637 return new char[] { (char) codePoint }; 9638 } else if (isValidCodePoint(codePoint)) { 9639 char[] result = new char[2]; 9640 toSurrogates(codePoint, result, 0); 9641 return result; 9642 } else { 9643 throw new IllegalArgumentException( 9644 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9645 } 9646 } 9647 9648 static void toSurrogates(int codePoint, char[] dst, int index) { 9649 // We write elements "backwards" to guarantee all-or-nothing 9650 dst[index+1] = lowSurrogate(codePoint); 9651 dst[index] = highSurrogate(codePoint); 9652 } 9653 9654 /** 9655 * Returns the number of Unicode code points in the text range of 9656 * the specified char sequence. The text range begins at the 9657 * specified {@code beginIndex} and extends to the 9658 * {@code char} at index {@code endIndex - 1}. Thus the 9659 * length (in {@code char}s) of the text range is 9660 * {@code endIndex-beginIndex}. Unpaired surrogates within 9661 * the text range count as one code point each. 9662 * 9663 * @param seq the char sequence 9664 * @param beginIndex the index to the first {@code char} of 9665 * the text range. 9666 * @param endIndex the index after the last {@code char} of 9667 * the text range. 9668 * @return the number of Unicode code points in the specified text 9669 * range 9670 * @throws NullPointerException if {@code seq} is null. 9671 * @throws IndexOutOfBoundsException if the 9672 * {@code beginIndex} is negative, or {@code endIndex} 9673 * is larger than the length of the given sequence, or 9674 * {@code beginIndex} is larger than {@code endIndex}. 9675 * @since 1.5 9676 */ 9677 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9678 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 9679 int n = endIndex - beginIndex; 9680 for (int i = beginIndex; i < endIndex; ) { 9681 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9682 isLowSurrogate(seq.charAt(i))) { 9683 n--; 9684 i++; 9685 } 9686 } 9687 return n; 9688 } 9689 9690 /** 9691 * Returns the number of Unicode code points in a subarray of the 9692 * {@code char} array argument. The {@code offset} 9693 * argument is the index of the first {@code char} of the 9694 * subarray and the {@code count} argument specifies the 9695 * length of the subarray in {@code char}s. Unpaired 9696 * surrogates within the subarray count as one code point each. 9697 * 9698 * @param a the {@code char} array 9699 * @param offset the index of the first {@code char} in the 9700 * given {@code char} array 9701 * @param count the length of the subarray in {@code char}s 9702 * @return the number of Unicode code points in the specified subarray 9703 * @throws NullPointerException if {@code a} is null. 9704 * @throws IndexOutOfBoundsException if {@code offset} or 9705 * {@code count} is negative, or if {@code offset + 9706 * count} is larger than the length of the given array. 9707 * @since 1.5 9708 */ 9709 public static int codePointCount(char[] a, int offset, int count) { 9710 Objects.checkFromIndexSize(offset, count, a.length); 9711 return codePointCountImpl(a, offset, count); 9712 } 9713 9714 static int codePointCountImpl(char[] a, int offset, int count) { 9715 int endIndex = offset + count; 9716 int n = count; 9717 for (int i = offset; i < endIndex; ) { 9718 if (isHighSurrogate(a[i++]) && i < endIndex && 9719 isLowSurrogate(a[i])) { 9720 n--; 9721 i++; 9722 } 9723 } 9724 return n; 9725 } 9726 9727 /** 9728 * Returns the index within the given char sequence that is offset 9729 * from the given {@code index} by {@code codePointOffset} 9730 * code points. Unpaired surrogates within the text range given by 9731 * {@code index} and {@code codePointOffset} count as 9732 * one code point each. 9733 * 9734 * @param seq the char sequence 9735 * @param index the index to be offset 9736 * @param codePointOffset the offset in code points 9737 * @return the index within the char sequence 9738 * @throws NullPointerException if {@code seq} is null. 9739 * @throws IndexOutOfBoundsException if {@code index} 9740 * is negative or larger than the length of the char sequence, 9741 * or if {@code codePointOffset} is positive and the 9742 * subsequence starting with {@code index} has fewer than 9743 * {@code codePointOffset} code points, or if 9744 * {@code codePointOffset} is negative and the subsequence 9745 * before {@code index} has fewer than the absolute value 9746 * of {@code codePointOffset} code points. 9747 * @since 1.5 9748 */ 9749 public static int offsetByCodePoints(CharSequence seq, int index, 9750 int codePointOffset) { 9751 int length = seq.length(); 9752 if (index < 0 || index > length) { 9753 throw new IndexOutOfBoundsException(); 9754 } 9755 9756 int x = index; 9757 if (codePointOffset >= 0) { 9758 int i; 9759 for (i = 0; x < length && i < codePointOffset; i++) { 9760 if (isHighSurrogate(seq.charAt(x++)) && x < length && 9761 isLowSurrogate(seq.charAt(x))) { 9762 x++; 9763 } 9764 } 9765 if (i < codePointOffset) { 9766 throw new IndexOutOfBoundsException(); 9767 } 9768 } else { 9769 int i; 9770 for (i = codePointOffset; x > 0 && i < 0; i++) { 9771 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 9772 isHighSurrogate(seq.charAt(x-1))) { 9773 x--; 9774 } 9775 } 9776 if (i < 0) { 9777 throw new IndexOutOfBoundsException(); 9778 } 9779 } 9780 return x; 9781 } 9782 9783 /** 9784 * Returns the index within the given {@code char} subarray 9785 * that is offset from the given {@code index} by 9786 * {@code codePointOffset} code points. The 9787 * {@code start} and {@code count} arguments specify a 9788 * subarray of the {@code char} array. Unpaired surrogates 9789 * within the text range given by {@code index} and 9790 * {@code codePointOffset} count as one code point each. 9791 * 9792 * @param a the {@code char} array 9793 * @param start the index of the first {@code char} of the 9794 * subarray 9795 * @param count the length of the subarray in {@code char}s 9796 * @param index the index to be offset 9797 * @param codePointOffset the offset in code points 9798 * @return the index within the subarray 9799 * @throws NullPointerException if {@code a} is null. 9800 * @throws IndexOutOfBoundsException 9801 * if {@code start} or {@code count} is negative, 9802 * or if {@code start + count} is larger than the length of 9803 * the given array, 9804 * or if {@code index} is less than {@code start} or 9805 * larger then {@code start + count}, 9806 * or if {@code codePointOffset} is positive and the text range 9807 * starting with {@code index} and ending with {@code start + count - 1} 9808 * has fewer than {@code codePointOffset} code 9809 * points, 9810 * or if {@code codePointOffset} is negative and the text range 9811 * starting with {@code start} and ending with {@code index - 1} 9812 * has fewer than the absolute value of 9813 * {@code codePointOffset} code points. 9814 * @since 1.5 9815 */ 9816 public static int offsetByCodePoints(char[] a, int start, int count, 9817 int index, int codePointOffset) { 9818 if (count > a.length-start || start < 0 || count < 0 9819 || index < start || index > start+count) { 9820 throw new IndexOutOfBoundsException(); 9821 } 9822 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9823 } 9824 9825 static int offsetByCodePointsImpl(char[]a, int start, int count, 9826 int index, int codePointOffset) { 9827 int x = index; 9828 if (codePointOffset >= 0) { 9829 int limit = start + count; 9830 int i; 9831 for (i = 0; x < limit && i < codePointOffset; i++) { 9832 if (isHighSurrogate(a[x++]) && x < limit && 9833 isLowSurrogate(a[x])) { 9834 x++; 9835 } 9836 } 9837 if (i < codePointOffset) { 9838 throw new IndexOutOfBoundsException(); 9839 } 9840 } else { 9841 int i; 9842 for (i = codePointOffset; x > start && i < 0; i++) { 9843 if (isLowSurrogate(a[--x]) && x > start && 9844 isHighSurrogate(a[x-1])) { 9845 x--; 9846 } 9847 } 9848 if (i < 0) { 9849 throw new IndexOutOfBoundsException(); 9850 } 9851 } 9852 return x; 9853 } 9854 9855 /** 9856 * Determines if the specified character is a lowercase character. 9857 * <p> 9858 * A character is lowercase if its general category type, provided 9859 * by {@code Character.getType(ch)}, is 9860 * {@code LOWERCASE_LETTER}, or it has contributory property 9861 * Other_Lowercase as defined by the Unicode Standard. 9862 * <p> 9863 * The following are examples of lowercase characters: 9864 * <blockquote><pre> 9865 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9866 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9867 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9868 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9869 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9870 * </pre></blockquote> 9871 * <p> Many other Unicode characters are lowercase too. 9872 * 9873 * <p><b>Note:</b> This method cannot handle <a 9874 * href="#supplementary"> supplementary characters</a>. To support 9875 * all Unicode characters, including supplementary characters, use 9876 * the {@link #isLowerCase(int)} method. 9877 * 9878 * @param ch the character to be tested. 9879 * @return {@code true} if the character is lowercase; 9880 * {@code false} otherwise. 9881 * @see Character#isLowerCase(char) 9882 * @see Character#isTitleCase(char) 9883 * @see Character#toLowerCase(char) 9884 * @see Character#getType(char) 9885 */ 9886 public static boolean isLowerCase(char ch) { 9887 return isLowerCase((int)ch); 9888 } 9889 9890 /** 9891 * Determines if the specified character (Unicode code point) is a 9892 * lowercase character. 9893 * <p> 9894 * A character is lowercase if its general category type, provided 9895 * by {@link Character#getType getType(codePoint)}, is 9896 * {@code LOWERCASE_LETTER}, or it has contributory property 9897 * Other_Lowercase as defined by the Unicode Standard. 9898 * <p> 9899 * The following are examples of lowercase characters: 9900 * <blockquote><pre> 9901 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9902 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9903 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9904 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9905 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9906 * </pre></blockquote> 9907 * <p> Many other Unicode characters are lowercase too. 9908 * 9909 * @param codePoint the character (Unicode code point) to be tested. 9910 * @return {@code true} if the character is lowercase; 9911 * {@code false} otherwise. 9912 * @see Character#isLowerCase(int) 9913 * @see Character#isTitleCase(int) 9914 * @see Character#toLowerCase(int) 9915 * @see Character#getType(int) 9916 * @since 1.5 9917 */ 9918 public static boolean isLowerCase(int codePoint) { 9919 return CharacterData.of(codePoint).isLowerCase(codePoint); 9920 } 9921 9922 /** 9923 * Determines if the specified character is an uppercase character. 9924 * <p> 9925 * A character is uppercase if its general category type, provided by 9926 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9927 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9928 * <p> 9929 * The following are examples of uppercase characters: 9930 * <blockquote><pre> 9931 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9932 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9933 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9934 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9935 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9936 * </pre></blockquote> 9937 * <p> Many other Unicode characters are uppercase too. 9938 * 9939 * <p><b>Note:</b> This method cannot handle <a 9940 * href="#supplementary"> supplementary characters</a>. To support 9941 * all Unicode characters, including supplementary characters, use 9942 * the {@link #isUpperCase(int)} method. 9943 * 9944 * @param ch the character to be tested. 9945 * @return {@code true} if the character is uppercase; 9946 * {@code false} otherwise. 9947 * @see Character#isLowerCase(char) 9948 * @see Character#isTitleCase(char) 9949 * @see Character#toUpperCase(char) 9950 * @see Character#getType(char) 9951 * @since 1.0 9952 */ 9953 public static boolean isUpperCase(char ch) { 9954 return isUpperCase((int)ch); 9955 } 9956 9957 /** 9958 * Determines if the specified character (Unicode code point) is an uppercase character. 9959 * <p> 9960 * A character is uppercase if its general category type, provided by 9961 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 9962 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9963 * <p> 9964 * The following are examples of uppercase characters: 9965 * <blockquote><pre> 9966 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9967 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9968 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9969 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9970 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9971 * </pre></blockquote> 9972 * <p> Many other Unicode characters are uppercase too. 9973 * 9974 * @param codePoint the character (Unicode code point) to be tested. 9975 * @return {@code true} if the character is uppercase; 9976 * {@code false} otherwise. 9977 * @see Character#isLowerCase(int) 9978 * @see Character#isTitleCase(int) 9979 * @see Character#toUpperCase(int) 9980 * @see Character#getType(int) 9981 * @since 1.5 9982 */ 9983 public static boolean isUpperCase(int codePoint) { 9984 return CharacterData.of(codePoint).isUpperCase(codePoint); 9985 } 9986 9987 /** 9988 * Determines if the specified character is a titlecase character. 9989 * <p> 9990 * A character is a titlecase character if its general 9991 * category type, provided by {@code Character.getType(ch)}, 9992 * is {@code TITLECASE_LETTER}. 9993 * <p> 9994 * Some characters look like pairs of Latin letters. For example, there 9995 * is an uppercase letter that looks like "LJ" and has a corresponding 9996 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9997 * is the appropriate form to use when rendering a word in lowercase 9998 * with initial capitals, as for a book title. 9999 * <p> 10000 * These are some of the Unicode characters for which this method returns 10001 * {@code true}: 10002 * <ul> 10003 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10004 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10005 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10006 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10007 * </ul> 10008 * <p> Many other Unicode characters are titlecase too. 10009 * 10010 * <p><b>Note:</b> This method cannot handle <a 10011 * href="#supplementary"> supplementary characters</a>. To support 10012 * all Unicode characters, including supplementary characters, use 10013 * the {@link #isTitleCase(int)} method. 10014 * 10015 * @param ch the character to be tested. 10016 * @return {@code true} if the character is titlecase; 10017 * {@code false} otherwise. 10018 * @see Character#isLowerCase(char) 10019 * @see Character#isUpperCase(char) 10020 * @see Character#toTitleCase(char) 10021 * @see Character#getType(char) 10022 * @since 1.0.2 10023 */ 10024 public static boolean isTitleCase(char ch) { 10025 return isTitleCase((int)ch); 10026 } 10027 10028 /** 10029 * Determines if the specified character (Unicode code point) is a titlecase character. 10030 * <p> 10031 * A character is a titlecase character if its general 10032 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10033 * is {@code TITLECASE_LETTER}. 10034 * <p> 10035 * Some characters look like pairs of Latin letters. For example, there 10036 * is an uppercase letter that looks like "LJ" and has a corresponding 10037 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10038 * is the appropriate form to use when rendering a word in lowercase 10039 * with initial capitals, as for a book title. 10040 * <p> 10041 * These are some of the Unicode characters for which this method returns 10042 * {@code true}: 10043 * <ul> 10044 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10045 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10046 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10047 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10048 * </ul> 10049 * <p> Many other Unicode characters are titlecase too. 10050 * 10051 * @param codePoint the character (Unicode code point) to be tested. 10052 * @return {@code true} if the character is titlecase; 10053 * {@code false} otherwise. 10054 * @see Character#isLowerCase(int) 10055 * @see Character#isUpperCase(int) 10056 * @see Character#toTitleCase(int) 10057 * @see Character#getType(int) 10058 * @since 1.5 10059 */ 10060 public static boolean isTitleCase(int codePoint) { 10061 return getType(codePoint) == Character.TITLECASE_LETTER; 10062 } 10063 10064 /** 10065 * Determines if the specified character is a digit. 10066 * <p> 10067 * A character is a digit if its general category type, provided 10068 * by {@code Character.getType(ch)}, is 10069 * {@code DECIMAL_DIGIT_NUMBER}. 10070 * <p> 10071 * Some Unicode character ranges that contain digits: 10072 * <ul> 10073 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10074 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10075 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10076 * Arabic-Indic digits 10077 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10078 * Extended Arabic-Indic digits 10079 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10080 * Devanagari digits 10081 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10082 * Fullwidth digits 10083 * </ul> 10084 * 10085 * Many other character ranges contain digits as well. 10086 * 10087 * <p><b>Note:</b> This method cannot handle <a 10088 * href="#supplementary"> supplementary characters</a>. To support 10089 * all Unicode characters, including supplementary characters, use 10090 * the {@link #isDigit(int)} method. 10091 * 10092 * @param ch the character to be tested. 10093 * @return {@code true} if the character is a digit; 10094 * {@code false} otherwise. 10095 * @see Character#digit(char, int) 10096 * @see Character#forDigit(int, int) 10097 * @see Character#getType(char) 10098 */ 10099 public static boolean isDigit(char ch) { 10100 return isDigit((int)ch); 10101 } 10102 10103 /** 10104 * Determines if the specified character (Unicode code point) is a digit. 10105 * <p> 10106 * A character is a digit if its general category type, provided 10107 * by {@link Character#getType(int) getType(codePoint)}, is 10108 * {@code DECIMAL_DIGIT_NUMBER}. 10109 * <p> 10110 * Some Unicode character ranges that contain digits: 10111 * <ul> 10112 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10113 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10114 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10115 * Arabic-Indic digits 10116 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10117 * Extended Arabic-Indic digits 10118 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10119 * Devanagari digits 10120 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10121 * Fullwidth digits 10122 * </ul> 10123 * 10124 * Many other character ranges contain digits as well. 10125 * 10126 * @param codePoint the character (Unicode code point) to be tested. 10127 * @return {@code true} if the character is a digit; 10128 * {@code false} otherwise. 10129 * @see Character#forDigit(int, int) 10130 * @see Character#getType(int) 10131 * @since 1.5 10132 */ 10133 public static boolean isDigit(int codePoint) { 10134 return CharacterData.of(codePoint).isDigit(codePoint); 10135 } 10136 10137 /** 10138 * Determines if a character is defined in Unicode. 10139 * <p> 10140 * A character is defined if at least one of the following is true: 10141 * <ul> 10142 * <li>It has an entry in the UnicodeData file. 10143 * <li>It has a value in a range defined by the UnicodeData file. 10144 * </ul> 10145 * 10146 * <p><b>Note:</b> This method cannot handle <a 10147 * href="#supplementary"> supplementary characters</a>. To support 10148 * all Unicode characters, including supplementary characters, use 10149 * the {@link #isDefined(int)} method. 10150 * 10151 * @param ch the character to be tested 10152 * @return {@code true} if the character has a defined meaning 10153 * in Unicode; {@code false} otherwise. 10154 * @see Character#isDigit(char) 10155 * @see Character#isLetter(char) 10156 * @see Character#isLetterOrDigit(char) 10157 * @see Character#isLowerCase(char) 10158 * @see Character#isTitleCase(char) 10159 * @see Character#isUpperCase(char) 10160 * @since 1.0.2 10161 */ 10162 public static boolean isDefined(char ch) { 10163 return isDefined((int)ch); 10164 } 10165 10166 /** 10167 * Determines if a character (Unicode code point) is defined in Unicode. 10168 * <p> 10169 * A character is defined if at least one of the following is true: 10170 * <ul> 10171 * <li>It has an entry in the UnicodeData file. 10172 * <li>It has a value in a range defined by the UnicodeData file. 10173 * </ul> 10174 * 10175 * @param codePoint the character (Unicode code point) to be tested. 10176 * @return {@code true} if the character has a defined meaning 10177 * in Unicode; {@code false} otherwise. 10178 * @see Character#isDigit(int) 10179 * @see Character#isLetter(int) 10180 * @see Character#isLetterOrDigit(int) 10181 * @see Character#isLowerCase(int) 10182 * @see Character#isTitleCase(int) 10183 * @see Character#isUpperCase(int) 10184 * @since 1.5 10185 */ 10186 public static boolean isDefined(int codePoint) { 10187 return getType(codePoint) != Character.UNASSIGNED; 10188 } 10189 10190 /** 10191 * Determines if the specified character is a letter. 10192 * <p> 10193 * A character is considered to be a letter if its general 10194 * category type, provided by {@code Character.getType(ch)}, 10195 * is any of the following: 10196 * <ul> 10197 * <li> {@code UPPERCASE_LETTER} 10198 * <li> {@code LOWERCASE_LETTER} 10199 * <li> {@code TITLECASE_LETTER} 10200 * <li> {@code MODIFIER_LETTER} 10201 * <li> {@code OTHER_LETTER} 10202 * </ul> 10203 * 10204 * Not all letters have case. Many characters are 10205 * letters but are neither uppercase nor lowercase nor titlecase. 10206 * 10207 * <p><b>Note:</b> This method cannot handle <a 10208 * href="#supplementary"> supplementary characters</a>. To support 10209 * all Unicode characters, including supplementary characters, use 10210 * the {@link #isLetter(int)} method. 10211 * 10212 * @param ch the character to be tested. 10213 * @return {@code true} if the character is a letter; 10214 * {@code false} otherwise. 10215 * @see Character#isDigit(char) 10216 * @see Character#isJavaIdentifierStart(char) 10217 * @see Character#isJavaLetter(char) 10218 * @see Character#isJavaLetterOrDigit(char) 10219 * @see Character#isLetterOrDigit(char) 10220 * @see Character#isLowerCase(char) 10221 * @see Character#isTitleCase(char) 10222 * @see Character#isUnicodeIdentifierStart(char) 10223 * @see Character#isUpperCase(char) 10224 */ 10225 public static boolean isLetter(char ch) { 10226 return isLetter((int)ch); 10227 } 10228 10229 /** 10230 * Determines if the specified character (Unicode code point) is a letter. 10231 * <p> 10232 * A character is considered to be a letter if its general 10233 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10234 * is any of the following: 10235 * <ul> 10236 * <li> {@code UPPERCASE_LETTER} 10237 * <li> {@code LOWERCASE_LETTER} 10238 * <li> {@code TITLECASE_LETTER} 10239 * <li> {@code MODIFIER_LETTER} 10240 * <li> {@code OTHER_LETTER} 10241 * </ul> 10242 * 10243 * Not all letters have case. Many characters are 10244 * letters but are neither uppercase nor lowercase nor titlecase. 10245 * 10246 * @param codePoint the character (Unicode code point) to be tested. 10247 * @return {@code true} if the character is a letter; 10248 * {@code false} otherwise. 10249 * @see Character#isDigit(int) 10250 * @see Character#isJavaIdentifierStart(int) 10251 * @see Character#isLetterOrDigit(int) 10252 * @see Character#isLowerCase(int) 10253 * @see Character#isTitleCase(int) 10254 * @see Character#isUnicodeIdentifierStart(int) 10255 * @see Character#isUpperCase(int) 10256 * @since 1.5 10257 */ 10258 public static boolean isLetter(int codePoint) { 10259 return ((((1 << Character.UPPERCASE_LETTER) | 10260 (1 << Character.LOWERCASE_LETTER) | 10261 (1 << Character.TITLECASE_LETTER) | 10262 (1 << Character.MODIFIER_LETTER) | 10263 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10264 != 0; 10265 } 10266 10267 /** 10268 * Determines if the specified character is a letter or digit. 10269 * <p> 10270 * A character is considered to be a letter or digit if either 10271 * {@code Character.isLetter(char ch)} or 10272 * {@code Character.isDigit(char ch)} returns 10273 * {@code true} for the character. 10274 * 10275 * <p><b>Note:</b> This method cannot handle <a 10276 * href="#supplementary"> supplementary characters</a>. To support 10277 * all Unicode characters, including supplementary characters, use 10278 * the {@link #isLetterOrDigit(int)} method. 10279 * 10280 * @param ch the character to be tested. 10281 * @return {@code true} if the character is a letter or digit; 10282 * {@code false} otherwise. 10283 * @see Character#isDigit(char) 10284 * @see Character#isJavaIdentifierPart(char) 10285 * @see Character#isJavaLetter(char) 10286 * @see Character#isJavaLetterOrDigit(char) 10287 * @see Character#isLetter(char) 10288 * @see Character#isUnicodeIdentifierPart(char) 10289 * @since 1.0.2 10290 */ 10291 public static boolean isLetterOrDigit(char ch) { 10292 return isLetterOrDigit((int)ch); 10293 } 10294 10295 /** 10296 * Determines if the specified character (Unicode code point) is a letter or digit. 10297 * <p> 10298 * A character is considered to be a letter or digit if either 10299 * {@link #isLetter(int) isLetter(codePoint)} or 10300 * {@link #isDigit(int) isDigit(codePoint)} returns 10301 * {@code true} for the character. 10302 * 10303 * @param codePoint the character (Unicode code point) to be tested. 10304 * @return {@code true} if the character is a letter or digit; 10305 * {@code false} otherwise. 10306 * @see Character#isDigit(int) 10307 * @see Character#isJavaIdentifierPart(int) 10308 * @see Character#isLetter(int) 10309 * @see Character#isUnicodeIdentifierPart(int) 10310 * @since 1.5 10311 */ 10312 public static boolean isLetterOrDigit(int codePoint) { 10313 return ((((1 << Character.UPPERCASE_LETTER) | 10314 (1 << Character.LOWERCASE_LETTER) | 10315 (1 << Character.TITLECASE_LETTER) | 10316 (1 << Character.MODIFIER_LETTER) | 10317 (1 << Character.OTHER_LETTER) | 10318 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10319 != 0; 10320 } 10321 10322 /** 10323 * Determines if the specified character is permissible as the first 10324 * character in a Java identifier. 10325 * <p> 10326 * A character may start a Java identifier if and only if 10327 * one of the following conditions is true: 10328 * <ul> 10329 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10330 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10331 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10332 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10333 * </ul> 10334 * 10335 * @param ch the character to be tested. 10336 * @return {@code true} if the character may start a Java 10337 * identifier; {@code false} otherwise. 10338 * @see Character#isJavaLetterOrDigit(char) 10339 * @see Character#isJavaIdentifierStart(char) 10340 * @see Character#isJavaIdentifierPart(char) 10341 * @see Character#isLetter(char) 10342 * @see Character#isLetterOrDigit(char) 10343 * @see Character#isUnicodeIdentifierStart(char) 10344 * @since 1.0.2 10345 * @deprecated Replaced by isJavaIdentifierStart(char). 10346 */ 10347 @Deprecated(since="1.1") 10348 public static boolean isJavaLetter(char ch) { 10349 return isJavaIdentifierStart(ch); 10350 } 10351 10352 /** 10353 * Determines if the specified character may be part of a Java 10354 * identifier as other than the first character. 10355 * <p> 10356 * A character may be part of a Java identifier if and only if one 10357 * of the following conditions is true: 10358 * <ul> 10359 * <li> it is a letter 10360 * <li> it is a currency symbol (such as {@code '$'}) 10361 * <li> it is a connecting punctuation character (such as {@code '_'}) 10362 * <li> it is a digit 10363 * <li> it is a numeric letter (such as a Roman numeral character) 10364 * <li> it is a combining mark 10365 * <li> it is a non-spacing mark 10366 * <li> {@code isIdentifierIgnorable} returns 10367 * {@code true} for the character. 10368 * </ul> 10369 * 10370 * @param ch the character to be tested. 10371 * @return {@code true} if the character may be part of a 10372 * Java identifier; {@code false} otherwise. 10373 * @see Character#isJavaLetter(char) 10374 * @see Character#isJavaIdentifierStart(char) 10375 * @see Character#isJavaIdentifierPart(char) 10376 * @see Character#isLetter(char) 10377 * @see Character#isLetterOrDigit(char) 10378 * @see Character#isUnicodeIdentifierPart(char) 10379 * @see Character#isIdentifierIgnorable(char) 10380 * @since 1.0.2 10381 * @deprecated Replaced by isJavaIdentifierPart(char). 10382 */ 10383 @Deprecated(since="1.1") 10384 public static boolean isJavaLetterOrDigit(char ch) { 10385 return isJavaIdentifierPart(ch); 10386 } 10387 10388 /** 10389 * Determines if the specified character (Unicode code point) is alphabetic. 10390 * <p> 10391 * A character is considered to be alphabetic if its general category type, 10392 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10393 * the following: 10394 * <ul> 10395 * <li> {@code UPPERCASE_LETTER} 10396 * <li> {@code LOWERCASE_LETTER} 10397 * <li> {@code TITLECASE_LETTER} 10398 * <li> {@code MODIFIER_LETTER} 10399 * <li> {@code OTHER_LETTER} 10400 * <li> {@code LETTER_NUMBER} 10401 * </ul> 10402 * or it has contributory property Other_Alphabetic as defined by the 10403 * Unicode Standard. 10404 * 10405 * @param codePoint the character (Unicode code point) to be tested. 10406 * @return {@code true} if the character is a Unicode alphabet 10407 * character, {@code false} otherwise. 10408 * @since 1.7 10409 */ 10410 public static boolean isAlphabetic(int codePoint) { 10411 return (((((1 << Character.UPPERCASE_LETTER) | 10412 (1 << Character.LOWERCASE_LETTER) | 10413 (1 << Character.TITLECASE_LETTER) | 10414 (1 << Character.MODIFIER_LETTER) | 10415 (1 << Character.OTHER_LETTER) | 10416 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10417 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10418 } 10419 10420 /** 10421 * Determines if the specified character (Unicode code point) is a CJKV 10422 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10423 * the Unicode Standard. 10424 * 10425 * @param codePoint the character (Unicode code point) to be tested. 10426 * @return {@code true} if the character is a Unicode ideograph 10427 * character, {@code false} otherwise. 10428 * @since 1.7 10429 */ 10430 public static boolean isIdeographic(int codePoint) { 10431 return CharacterData.of(codePoint).isIdeographic(codePoint); 10432 } 10433 10434 /** 10435 * Determines if the specified character is 10436 * permissible as the first character in a Java identifier. 10437 * <p> 10438 * A character may start a Java identifier if and only if 10439 * one of the following conditions is true: 10440 * <ul> 10441 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10442 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10443 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10444 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10445 * </ul> 10446 * 10447 * <p><b>Note:</b> This method cannot handle <a 10448 * href="#supplementary"> supplementary characters</a>. To support 10449 * all Unicode characters, including supplementary characters, use 10450 * the {@link #isJavaIdentifierStart(int)} method. 10451 * 10452 * @param ch the character to be tested. 10453 * @return {@code true} if the character may start a Java identifier; 10454 * {@code false} otherwise. 10455 * @see Character#isJavaIdentifierPart(char) 10456 * @see Character#isLetter(char) 10457 * @see Character#isUnicodeIdentifierStart(char) 10458 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10459 * @since 1.1 10460 */ 10461 @SuppressWarnings("doclint:reference") // cross-module links 10462 public static boolean isJavaIdentifierStart(char ch) { 10463 return isJavaIdentifierStart((int)ch); 10464 } 10465 10466 /** 10467 * Determines if the character (Unicode code point) is 10468 * permissible as the first character in a Java identifier. 10469 * <p> 10470 * A character may start a Java identifier if and only if 10471 * one of the following conditions is true: 10472 * <ul> 10473 * <li> {@link #isLetter(int) isLetter(codePoint)} 10474 * returns {@code true} 10475 * <li> {@link #getType(int) getType(codePoint)} 10476 * returns {@code LETTER_NUMBER} 10477 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10478 * <li> the referenced character is a connecting punctuation character 10479 * (such as {@code '_'}). 10480 * </ul> 10481 * 10482 * @param codePoint the character (Unicode code point) to be tested. 10483 * @return {@code true} if the character may start a Java identifier; 10484 * {@code false} otherwise. 10485 * @see Character#isJavaIdentifierPart(int) 10486 * @see Character#isLetter(int) 10487 * @see Character#isUnicodeIdentifierStart(int) 10488 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10489 * @since 1.5 10490 */ 10491 @SuppressWarnings("doclint:reference") // cross-module links 10492 public static boolean isJavaIdentifierStart(int codePoint) { 10493 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10494 } 10495 10496 /** 10497 * Determines if the specified character may be part of a Java 10498 * identifier as other than the first character. 10499 * <p> 10500 * A character may be part of a Java identifier if any of the following 10501 * conditions are true: 10502 * <ul> 10503 * <li> it is a letter 10504 * <li> it is a currency symbol (such as {@code '$'}) 10505 * <li> it is a connecting punctuation character (such as {@code '_'}) 10506 * <li> it is a digit 10507 * <li> it is a numeric letter (such as a Roman numeral character) 10508 * <li> it is a combining mark 10509 * <li> it is a non-spacing mark 10510 * <li> {@code isIdentifierIgnorable} returns 10511 * {@code true} for the character 10512 * </ul> 10513 * 10514 * <p><b>Note:</b> This method cannot handle <a 10515 * href="#supplementary"> supplementary characters</a>. To support 10516 * all Unicode characters, including supplementary characters, use 10517 * the {@link #isJavaIdentifierPart(int)} method. 10518 * 10519 * @param ch the character to be tested. 10520 * @return {@code true} if the character may be part of a 10521 * Java identifier; {@code false} otherwise. 10522 * @see Character#isIdentifierIgnorable(char) 10523 * @see Character#isJavaIdentifierStart(char) 10524 * @see Character#isLetterOrDigit(char) 10525 * @see Character#isUnicodeIdentifierPart(char) 10526 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10527 * @since 1.1 10528 */ 10529 @SuppressWarnings("doclint:reference") // cross-module links 10530 public static boolean isJavaIdentifierPart(char ch) { 10531 return isJavaIdentifierPart((int)ch); 10532 } 10533 10534 /** 10535 * Determines if the character (Unicode code point) may be part of a Java 10536 * identifier as other than the first character. 10537 * <p> 10538 * A character may be part of a Java identifier if any of the following 10539 * conditions are true: 10540 * <ul> 10541 * <li> it is a letter 10542 * <li> it is a currency symbol (such as {@code '$'}) 10543 * <li> it is a connecting punctuation character (such as {@code '_'}) 10544 * <li> it is a digit 10545 * <li> it is a numeric letter (such as a Roman numeral character) 10546 * <li> it is a combining mark 10547 * <li> it is a non-spacing mark 10548 * <li> {@link #isIdentifierIgnorable(int) 10549 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10550 * the code point 10551 * </ul> 10552 * 10553 * @param codePoint the character (Unicode code point) to be tested. 10554 * @return {@code true} if the character may be part of a 10555 * Java identifier; {@code false} otherwise. 10556 * @see Character#isIdentifierIgnorable(int) 10557 * @see Character#isJavaIdentifierStart(int) 10558 * @see Character#isLetterOrDigit(int) 10559 * @see Character#isUnicodeIdentifierPart(int) 10560 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10561 * @since 1.5 10562 */ 10563 @SuppressWarnings("doclint:reference") // cross-module links 10564 public static boolean isJavaIdentifierPart(int codePoint) { 10565 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10566 } 10567 10568 /** 10569 * Determines if the specified character is permissible as the 10570 * first character in a Unicode identifier. 10571 * <p> 10572 * A character may start a Unicode identifier if and only if 10573 * one of the following conditions is true: 10574 * <ul> 10575 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10576 * <li> {@link #getType(char) getType(ch)} returns 10577 * {@code LETTER_NUMBER}. 10578 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10579 * {@code Other_ID_Start}</a> character. 10580 * </ul> 10581 * <p> 10582 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10583 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10584 * with the following profile of UAX31: 10585 * <pre> 10586 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10587 * </pre> 10588 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10589 * compatibility. 10590 * 10591 * <p><b>Note:</b> This method cannot handle <a 10592 * href="#supplementary"> supplementary characters</a>. To support 10593 * all Unicode characters, including supplementary characters, use 10594 * the {@link #isUnicodeIdentifierStart(int)} method. 10595 * 10596 * @param ch the character to be tested. 10597 * @return {@code true} if the character may start a Unicode 10598 * identifier; {@code false} otherwise. 10599 * 10600 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10601 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10602 * @see Character#isJavaIdentifierStart(char) 10603 * @see Character#isLetter(char) 10604 * @see Character#isUnicodeIdentifierPart(char) 10605 * @since 1.1 10606 */ 10607 public static boolean isUnicodeIdentifierStart(char ch) { 10608 return isUnicodeIdentifierStart((int)ch); 10609 } 10610 10611 /** 10612 * Determines if the specified character (Unicode code point) is permissible as the 10613 * first character in a Unicode identifier. 10614 * <p> 10615 * A character may start a Unicode identifier if and only if 10616 * one of the following conditions is true: 10617 * <ul> 10618 * <li> {@link #isLetter(int) isLetter(codePoint)} 10619 * returns {@code true} 10620 * <li> {@link #getType(int) getType(codePoint)} 10621 * returns {@code LETTER_NUMBER}. 10622 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10623 * {@code Other_ID_Start}</a> character. 10624 * </ul> 10625 * <p> 10626 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10627 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10628 * with the following profile of UAX31: 10629 * <pre> 10630 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10631 * </pre> 10632 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10633 * compatibility. 10634 * 10635 * @param codePoint the character (Unicode code point) to be tested. 10636 * @return {@code true} if the character may start a Unicode 10637 * identifier; {@code false} otherwise. 10638 * 10639 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10640 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10641 * @see Character#isJavaIdentifierStart(int) 10642 * @see Character#isLetter(int) 10643 * @see Character#isUnicodeIdentifierPart(int) 10644 * @since 1.5 10645 */ 10646 public static boolean isUnicodeIdentifierStart(int codePoint) { 10647 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10648 } 10649 10650 /** 10651 * Determines if the specified character may be part of a Unicode 10652 * identifier as other than the first character. 10653 * <p> 10654 * A character may be part of a Unicode identifier if and only if 10655 * one of the following statements is true: 10656 * <ul> 10657 * <li> it is a letter 10658 * <li> it is a connecting punctuation character (such as {@code '_'}) 10659 * <li> it is a digit 10660 * <li> it is a numeric letter (such as a Roman numeral character) 10661 * <li> it is a combining mark 10662 * <li> it is a non-spacing mark 10663 * <li> {@code isIdentifierIgnorable} returns 10664 * {@code true} for this character. 10665 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10666 * {@code Other_ID_Start}</a> character. 10667 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10668 * {@code Other_ID_Continue}</a> character. 10669 * </ul> 10670 * <p> 10671 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10672 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10673 * with the following profile of UAX31: 10674 * <pre> 10675 * Continue := Start + ID_Continue + ignorable 10676 * Medial := empty 10677 * ignorable := isIdentifierIgnorable(char) returns true for the character 10678 * </pre> 10679 * {@code ignorable} is added to {@code Continue} for backward 10680 * compatibility. 10681 * 10682 * <p><b>Note:</b> This method cannot handle <a 10683 * href="#supplementary"> supplementary characters</a>. To support 10684 * all Unicode characters, including supplementary characters, use 10685 * the {@link #isUnicodeIdentifierPart(int)} method. 10686 * 10687 * @param ch the character to be tested. 10688 * @return {@code true} if the character may be part of a 10689 * Unicode identifier; {@code false} otherwise. 10690 * 10691 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10692 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10693 * @see Character#isIdentifierIgnorable(char) 10694 * @see Character#isJavaIdentifierPart(char) 10695 * @see Character#isLetterOrDigit(char) 10696 * @see Character#isUnicodeIdentifierStart(char) 10697 * @since 1.1 10698 */ 10699 public static boolean isUnicodeIdentifierPart(char ch) { 10700 return isUnicodeIdentifierPart((int)ch); 10701 } 10702 10703 /** 10704 * Determines if the specified character (Unicode code point) may be part of a Unicode 10705 * identifier as other than the first character. 10706 * <p> 10707 * A character may be part of a Unicode identifier if and only if 10708 * one of the following statements is true: 10709 * <ul> 10710 * <li> it is a letter 10711 * <li> it is a connecting punctuation character (such as {@code '_'}) 10712 * <li> it is a digit 10713 * <li> it is a numeric letter (such as a Roman numeral character) 10714 * <li> it is a combining mark 10715 * <li> it is a non-spacing mark 10716 * <li> {@code isIdentifierIgnorable} returns 10717 * {@code true} for this character. 10718 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10719 * {@code Other_ID_Start}</a> character. 10720 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10721 * {@code Other_ID_Continue}</a> character. 10722 * </ul> 10723 * <p> 10724 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10725 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10726 * with the following profile of UAX31: 10727 * <pre> 10728 * Continue := Start + ID_Continue + ignorable 10729 * Medial := empty 10730 * ignorable := isIdentifierIgnorable(int) returns true for the character 10731 * </pre> 10732 * {@code ignorable} is added to {@code Continue} for backward 10733 * compatibility. 10734 * 10735 * @param codePoint the character (Unicode code point) to be tested. 10736 * @return {@code true} if the character may be part of a 10737 * Unicode identifier; {@code false} otherwise. 10738 * 10739 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10740 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10741 * @see Character#isIdentifierIgnorable(int) 10742 * @see Character#isJavaIdentifierPart(int) 10743 * @see Character#isLetterOrDigit(int) 10744 * @see Character#isUnicodeIdentifierStart(int) 10745 * @since 1.5 10746 */ 10747 public static boolean isUnicodeIdentifierPart(int codePoint) { 10748 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 10749 } 10750 10751 /** 10752 * Determines if the specified character should be regarded as 10753 * an ignorable character in a Java identifier or a Unicode identifier. 10754 * <p> 10755 * The following Unicode characters are ignorable in a Java identifier 10756 * or a Unicode identifier: 10757 * <ul> 10758 * <li>ISO control characters that are not whitespace 10759 * <ul> 10760 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10761 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10762 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10763 * </ul> 10764 * 10765 * <li>all characters that have the {@code FORMAT} general 10766 * category value 10767 * </ul> 10768 * 10769 * <p><b>Note:</b> This method cannot handle <a 10770 * href="#supplementary"> supplementary characters</a>. To support 10771 * all Unicode characters, including supplementary characters, use 10772 * the {@link #isIdentifierIgnorable(int)} method. 10773 * 10774 * @param ch the character to be tested. 10775 * @return {@code true} if the character is an ignorable control 10776 * character that may be part of a Java or Unicode identifier; 10777 * {@code false} otherwise. 10778 * @see Character#isJavaIdentifierPart(char) 10779 * @see Character#isUnicodeIdentifierPart(char) 10780 * @since 1.1 10781 */ 10782 public static boolean isIdentifierIgnorable(char ch) { 10783 return isIdentifierIgnorable((int)ch); 10784 } 10785 10786 /** 10787 * Determines if the specified character (Unicode code point) should be regarded as 10788 * an ignorable character in a Java identifier or a Unicode identifier. 10789 * <p> 10790 * The following Unicode characters are ignorable in a Java identifier 10791 * or a Unicode identifier: 10792 * <ul> 10793 * <li>ISO control characters that are not whitespace 10794 * <ul> 10795 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10796 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10797 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10798 * </ul> 10799 * 10800 * <li>all characters that have the {@code FORMAT} general 10801 * category value 10802 * </ul> 10803 * 10804 * @param codePoint the character (Unicode code point) to be tested. 10805 * @return {@code true} if the character is an ignorable control 10806 * character that may be part of a Java or Unicode identifier; 10807 * {@code false} otherwise. 10808 * @see Character#isJavaIdentifierPart(int) 10809 * @see Character#isUnicodeIdentifierPart(int) 10810 * @since 1.5 10811 */ 10812 public static boolean isIdentifierIgnorable(int codePoint) { 10813 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 10814 } 10815 10816 /** 10817 * Determines if the specified character (Unicode code point) is an Emoji. 10818 * <p> 10819 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 10820 * property, defined in 10821 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10822 * Unicode Emoji (Technical Standard #51)</a>. 10823 * 10824 * @param codePoint the character (Unicode code point) to be tested. 10825 * @return {@code true} if the character is an Emoji; 10826 * {@code false} otherwise. 10827 * @since 21 10828 */ 10829 public static boolean isEmoji(int codePoint) { 10830 return CharacterData.of(codePoint).isEmoji(codePoint); 10831 } 10832 10833 /** 10834 * Determines if the specified character (Unicode code point) has the 10835 * Emoji Presentation property by default. 10836 * <p> 10837 * A character is considered to have the Emoji Presentation property if and 10838 * only if it has the {@code Emoji_Presentation} property, defined in 10839 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10840 * Unicode Emoji (Technical Standard #51)</a>. 10841 * 10842 * @param codePoint the character (Unicode code point) to be tested. 10843 * @return {@code true} if the character has the Emoji Presentation 10844 * property; {@code false} otherwise. 10845 * @since 21 10846 */ 10847 public static boolean isEmojiPresentation(int codePoint) { 10848 return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 10849 } 10850 10851 /** 10852 * Determines if the specified character (Unicode code point) is an 10853 * Emoji Modifier. 10854 * <p> 10855 * A character is considered to be an Emoji Modifier if and only if it has 10856 * the {@code Emoji_Modifier} property, defined in 10857 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10858 * Unicode Emoji (Technical Standard #51)</a>. 10859 * 10860 * @param codePoint the character (Unicode code point) to be tested. 10861 * @return {@code true} if the character is an Emoji Modifier; 10862 * {@code false} otherwise. 10863 * @since 21 10864 */ 10865 public static boolean isEmojiModifier(int codePoint) { 10866 return CharacterData.of(codePoint).isEmojiModifier(codePoint); 10867 } 10868 10869 /** 10870 * Determines if the specified character (Unicode code point) is an 10871 * Emoji Modifier Base. 10872 * <p> 10873 * A character is considered to be an Emoji Modifier Base if and only if it has 10874 * the {@code Emoji_Modifier_Base} property, defined in 10875 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10876 * Unicode Emoji (Technical Standard #51)</a>. 10877 * 10878 * @param codePoint the character (Unicode code point) to be tested. 10879 * @return {@code true} if the character is an Emoji Modifier Base; 10880 * {@code false} otherwise. 10881 * @since 21 10882 */ 10883 public static boolean isEmojiModifierBase(int codePoint) { 10884 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 10885 } 10886 10887 /** 10888 * Determines if the specified character (Unicode code point) is an 10889 * Emoji Component. 10890 * <p> 10891 * A character is considered to be an Emoji Component if and only if it has 10892 * the {@code Emoji_Component} property, defined in 10893 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10894 * Unicode Emoji (Technical Standard #51)</a>. 10895 * 10896 * @param codePoint the character (Unicode code point) to be tested. 10897 * @return {@code true} if the character is an Emoji Component; 10898 * {@code false} otherwise. 10899 * @since 21 10900 */ 10901 public static boolean isEmojiComponent(int codePoint) { 10902 return CharacterData.of(codePoint).isEmojiComponent(codePoint); 10903 } 10904 10905 /** 10906 * Determines if the specified character (Unicode code point) is 10907 * an Extended Pictographic. 10908 * <p> 10909 * A character is considered to be an Extended Pictographic if and only if it has 10910 * the {@code Extended_Pictographic} property, defined in 10911 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10912 * Unicode Emoji (Technical Standard #51)</a>. 10913 * 10914 * @param codePoint the character (Unicode code point) to be tested. 10915 * @return {@code true} if the character is an Extended Pictographic; 10916 * {@code false} otherwise. 10917 * @since 21 10918 */ 10919 public static boolean isExtendedPictographic(int codePoint) { 10920 return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 10921 } 10922 10923 /** 10924 * Converts the character argument to lowercase using case 10925 * mapping information from the UnicodeData file. 10926 * <p> 10927 * Note that 10928 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 10929 * does not always return {@code true} for some ranges of 10930 * characters, particularly those that are symbols or ideographs. 10931 * 10932 * <p>In general, {@link String#toLowerCase()} should be used to map 10933 * characters to lowercase. {@code String} case mapping methods 10934 * have several benefits over {@code Character} case mapping methods. 10935 * {@code String} case mapping methods can perform locale-sensitive 10936 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10937 * the {@code Character} case mapping methods cannot. 10938 * 10939 * <p><b>Note:</b> This method cannot handle <a 10940 * href="#supplementary"> supplementary characters</a>. To support 10941 * all Unicode characters, including supplementary characters, use 10942 * the {@link #toLowerCase(int)} method. 10943 * 10944 * @param ch the character to be converted. 10945 * @return the lowercase equivalent of the character, if any; 10946 * otherwise, the character itself. 10947 * @see Character#isLowerCase(char) 10948 * @see String#toLowerCase() 10949 */ 10950 public static char toLowerCase(char ch) { 10951 return (char)toLowerCase((int)ch); 10952 } 10953 10954 /** 10955 * Converts the character (Unicode code point) argument to 10956 * lowercase using case mapping information from the UnicodeData 10957 * file. 10958 * 10959 * <p> Note that 10960 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 10961 * does not always return {@code true} for some ranges of 10962 * characters, particularly those that are symbols or ideographs. 10963 * 10964 * <p>In general, {@link String#toLowerCase()} should be used to map 10965 * characters to lowercase. {@code String} case mapping methods 10966 * have several benefits over {@code Character} case mapping methods. 10967 * {@code String} case mapping methods can perform locale-sensitive 10968 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10969 * the {@code Character} case mapping methods cannot. 10970 * 10971 * @param codePoint the character (Unicode code point) to be converted. 10972 * @return the lowercase equivalent of the character (Unicode code 10973 * point), if any; otherwise, the character itself. 10974 * @see Character#isLowerCase(int) 10975 * @see String#toLowerCase() 10976 * 10977 * @since 1.5 10978 */ 10979 public static int toLowerCase(int codePoint) { 10980 return CharacterData.of(codePoint).toLowerCase(codePoint); 10981 } 10982 10983 /** 10984 * Converts the character argument to uppercase using case mapping 10985 * information from the UnicodeData file. 10986 * <p> 10987 * Note that 10988 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 10989 * does not always return {@code true} for some ranges of 10990 * characters, particularly those that are symbols or ideographs. 10991 * 10992 * <p>In general, {@link String#toUpperCase()} should be used to map 10993 * characters to uppercase. {@code String} case mapping methods 10994 * have several benefits over {@code Character} case mapping methods. 10995 * {@code String} case mapping methods can perform locale-sensitive 10996 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10997 * the {@code Character} case mapping methods cannot. 10998 * 10999 * <p><b>Note:</b> This method cannot handle <a 11000 * href="#supplementary"> supplementary characters</a>. To support 11001 * all Unicode characters, including supplementary characters, use 11002 * the {@link #toUpperCase(int)} method. 11003 * 11004 * @param ch the character to be converted. 11005 * @return the uppercase equivalent of the character, if any; 11006 * otherwise, the character itself. 11007 * @see Character#isUpperCase(char) 11008 * @see String#toUpperCase() 11009 */ 11010 public static char toUpperCase(char ch) { 11011 return (char)toUpperCase((int)ch); 11012 } 11013 11014 /** 11015 * Converts the character (Unicode code point) argument to 11016 * uppercase using case mapping information from the UnicodeData 11017 * file. 11018 * 11019 * <p>Note that 11020 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11021 * does not always return {@code true} for some ranges of 11022 * characters, particularly those that are symbols or ideographs. 11023 * 11024 * <p>In general, {@link String#toUpperCase()} should be used to map 11025 * characters to uppercase. {@code String} case mapping methods 11026 * have several benefits over {@code Character} case mapping methods. 11027 * {@code String} case mapping methods can perform locale-sensitive 11028 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11029 * the {@code Character} case mapping methods cannot. 11030 * 11031 * @param codePoint the character (Unicode code point) to be converted. 11032 * @return the uppercase equivalent of the character, if any; 11033 * otherwise, the character itself. 11034 * @see Character#isUpperCase(int) 11035 * @see String#toUpperCase() 11036 * 11037 * @since 1.5 11038 */ 11039 public static int toUpperCase(int codePoint) { 11040 return CharacterData.of(codePoint).toUpperCase(codePoint); 11041 } 11042 11043 /** 11044 * Converts the character argument to titlecase using case mapping 11045 * information from the UnicodeData file. If a character has no 11046 * explicit titlecase mapping and is not itself a titlecase char 11047 * according to UnicodeData, then the uppercase mapping is 11048 * returned as an equivalent titlecase mapping. If the 11049 * {@code char} argument is already a titlecase 11050 * {@code char}, the same {@code char} value will be 11051 * returned. 11052 * <p> 11053 * Note that 11054 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11055 * does not always return {@code true} for some ranges of 11056 * characters. 11057 * 11058 * <p><b>Note:</b> This method cannot handle <a 11059 * href="#supplementary"> supplementary characters</a>. To support 11060 * all Unicode characters, including supplementary characters, use 11061 * the {@link #toTitleCase(int)} method. 11062 * 11063 * @param ch the character to be converted. 11064 * @return the titlecase equivalent of the character, if any; 11065 * otherwise, the character itself. 11066 * @see Character#isTitleCase(char) 11067 * @see Character#toLowerCase(char) 11068 * @see Character#toUpperCase(char) 11069 * @since 1.0.2 11070 */ 11071 public static char toTitleCase(char ch) { 11072 return (char)toTitleCase((int)ch); 11073 } 11074 11075 /** 11076 * Converts the character (Unicode code point) argument to titlecase using case mapping 11077 * information from the UnicodeData file. If a character has no 11078 * explicit titlecase mapping and is not itself a titlecase char 11079 * according to UnicodeData, then the uppercase mapping is 11080 * returned as an equivalent titlecase mapping. If the 11081 * character argument is already a titlecase 11082 * character, the same character value will be 11083 * returned. 11084 * 11085 * <p>Note that 11086 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11087 * does not always return {@code true} for some ranges of 11088 * characters. 11089 * 11090 * @param codePoint the character (Unicode code point) to be converted. 11091 * @return the titlecase equivalent of the character, if any; 11092 * otherwise, the character itself. 11093 * @see Character#isTitleCase(int) 11094 * @see Character#toLowerCase(int) 11095 * @see Character#toUpperCase(int) 11096 * @since 1.5 11097 */ 11098 public static int toTitleCase(int codePoint) { 11099 return CharacterData.of(codePoint).toTitleCase(codePoint); 11100 } 11101 11102 /** 11103 * Returns the numeric value of the character {@code ch} in the 11104 * specified radix. 11105 * <p> 11106 * If the radix is not in the range {@code MIN_RADIX} ≤ 11107 * {@code radix} ≤ {@code MAX_RADIX} or if the 11108 * value of {@code ch} is not a valid digit in the specified 11109 * radix, {@code -1} is returned. A character is a valid digit 11110 * if at least one of the following is true: 11111 * <ul> 11112 * <li>The method {@code isDigit} is {@code true} of the character 11113 * and the Unicode decimal digit value of the character (or its 11114 * single-character decomposition) is less than the specified radix. 11115 * In this case the decimal digit value is returned. 11116 * <li>The character is one of the uppercase Latin letters 11117 * {@code 'A'} through {@code 'Z'} and its code is less than 11118 * {@code radix + 'A' - 10}. 11119 * In this case, {@code ch - 'A' + 10} 11120 * is returned. 11121 * <li>The character is one of the lowercase Latin letters 11122 * {@code 'a'} through {@code 'z'} and its code is less than 11123 * {@code radix + 'a' - 10}. 11124 * In this case, {@code ch - 'a' + 10} 11125 * is returned. 11126 * <li>The character is one of the fullwidth uppercase Latin letters A 11127 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11128 * and its code is less than 11129 * {@code radix + '\u005CuFF21' - 10}. 11130 * In this case, {@code ch - '\u005CuFF21' + 10} 11131 * is returned. 11132 * <li>The character is one of the fullwidth lowercase Latin letters a 11133 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11134 * and its code is less than 11135 * {@code radix + '\u005CuFF41' - 10}. 11136 * In this case, {@code ch - '\u005CuFF41' + 10} 11137 * is returned. 11138 * </ul> 11139 * 11140 * <p><b>Note:</b> This method cannot handle <a 11141 * href="#supplementary"> supplementary characters</a>. To support 11142 * all Unicode characters, including supplementary characters, use 11143 * the {@link #digit(int, int)} method. 11144 * 11145 * @param ch the character to be converted. 11146 * @param radix the radix. 11147 * @return the numeric value represented by the character in the 11148 * specified radix. 11149 * @see Character#forDigit(int, int) 11150 * @see Character#isDigit(char) 11151 */ 11152 public static int digit(char ch, int radix) { 11153 return digit((int)ch, radix); 11154 } 11155 11156 /** 11157 * Returns the numeric value of the specified character (Unicode 11158 * code point) in the specified radix. 11159 * 11160 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11161 * {@code radix} ≤ {@code MAX_RADIX} or if the 11162 * character is not a valid digit in the specified 11163 * radix, {@code -1} is returned. A character is a valid digit 11164 * if at least one of the following is true: 11165 * <ul> 11166 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11167 * and the Unicode decimal digit value of the character (or its 11168 * single-character decomposition) is less than the specified radix. 11169 * In this case the decimal digit value is returned. 11170 * <li>The character is one of the uppercase Latin letters 11171 * {@code 'A'} through {@code 'Z'} and its code is less than 11172 * {@code radix + 'A' - 10}. 11173 * In this case, {@code codePoint - 'A' + 10} 11174 * is returned. 11175 * <li>The character is one of the lowercase Latin letters 11176 * {@code 'a'} through {@code 'z'} and its code is less than 11177 * {@code radix + 'a' - 10}. 11178 * In this case, {@code codePoint - 'a' + 10} 11179 * is returned. 11180 * <li>The character is one of the fullwidth uppercase Latin letters A 11181 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11182 * and its code is less than 11183 * {@code radix + '\u005CuFF21' - 10}. 11184 * In this case, 11185 * {@code codePoint - '\u005CuFF21' + 10} 11186 * is returned. 11187 * <li>The character is one of the fullwidth lowercase Latin letters a 11188 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11189 * and its code is less than 11190 * {@code radix + '\u005CuFF41'- 10}. 11191 * In this case, 11192 * {@code codePoint - '\u005CuFF41' + 10} 11193 * is returned. 11194 * </ul> 11195 * 11196 * @param codePoint the character (Unicode code point) to be converted. 11197 * @param radix the radix. 11198 * @return the numeric value represented by the character in the 11199 * specified radix. 11200 * @see Character#forDigit(int, int) 11201 * @see Character#isDigit(int) 11202 * @since 1.5 11203 */ 11204 public static int digit(int codePoint, int radix) { 11205 return CharacterData.of(codePoint).digit(codePoint, radix); 11206 } 11207 11208 /** 11209 * Returns the {@code int} value that the specified Unicode 11210 * character represents. For example, the character 11211 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11212 * an int with a value of 50. 11213 * <p> 11214 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11215 * {@code '\u005Cu005A'}), lowercase 11216 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11217 * full width variant ({@code '\u005CuFF21'} through 11218 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11219 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11220 * through 35. This is independent of the Unicode specification, 11221 * which does not assign numeric values to these {@code char} 11222 * values. 11223 * <p> 11224 * If the character does not have a numeric value, then -1 is returned. 11225 * If the character has a numeric value that cannot be represented as a 11226 * nonnegative integer (for example, a fractional value), then -2 11227 * is returned. 11228 * 11229 * <p><b>Note:</b> This method cannot handle <a 11230 * href="#supplementary"> supplementary characters</a>. To support 11231 * all Unicode characters, including supplementary characters, use 11232 * the {@link #getNumericValue(int)} method. 11233 * 11234 * @param ch the character to be converted. 11235 * @return the numeric value of the character, as a nonnegative {@code int} 11236 * value; -2 if the character has a numeric value but the value 11237 * can not be represented as a nonnegative {@code int} value; 11238 * -1 if the character has no numeric value. 11239 * @see Character#forDigit(int, int) 11240 * @see Character#isDigit(char) 11241 * @since 1.1 11242 */ 11243 public static int getNumericValue(char ch) { 11244 return getNumericValue((int)ch); 11245 } 11246 11247 /** 11248 * Returns the {@code int} value that the specified 11249 * character (Unicode code point) represents. For example, the character 11250 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11251 * an {@code int} with a value of 50. 11252 * <p> 11253 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11254 * {@code '\u005Cu005A'}), lowercase 11255 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11256 * full width variant ({@code '\u005CuFF21'} through 11257 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11258 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11259 * through 35. This is independent of the Unicode specification, 11260 * which does not assign numeric values to these {@code char} 11261 * values. 11262 * <p> 11263 * If the character does not have a numeric value, then -1 is returned. 11264 * If the character has a numeric value that cannot be represented as a 11265 * nonnegative integer (for example, a fractional value), then -2 11266 * is returned. 11267 * 11268 * @param codePoint the character (Unicode code point) to be converted. 11269 * @return the numeric value of the character, as a nonnegative {@code int} 11270 * value; -2 if the character has a numeric value but the value 11271 * can not be represented as a nonnegative {@code int} value; 11272 * -1 if the character has no numeric value. 11273 * @see Character#forDigit(int, int) 11274 * @see Character#isDigit(int) 11275 * @since 1.5 11276 */ 11277 public static int getNumericValue(int codePoint) { 11278 return CharacterData.of(codePoint).getNumericValue(codePoint); 11279 } 11280 11281 /** 11282 * Determines if the specified character is ISO-LATIN-1 white space. 11283 * This method returns {@code true} for the following five 11284 * characters only: 11285 * <table class="striped"> 11286 * <caption style="display:none">truechars</caption> 11287 * <thead> 11288 * <tr><th scope="col">Character 11289 * <th scope="col">Code 11290 * <th scope="col">Name 11291 * </thead> 11292 * <tbody> 11293 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11294 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11295 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11296 * <td>{@code NEW LINE}</td></tr> 11297 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11298 * <td>{@code FORM FEED}</td></tr> 11299 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11300 * <td>{@code CARRIAGE RETURN}</td></tr> 11301 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11302 * <td>{@code SPACE}</td></tr> 11303 * </tbody> 11304 * </table> 11305 * 11306 * @param ch the character to be tested. 11307 * @return {@code true} if the character is ISO-LATIN-1 white 11308 * space; {@code false} otherwise. 11309 * @see Character#isSpaceChar(char) 11310 * @see Character#isWhitespace(char) 11311 * @deprecated Replaced by isWhitespace(char). 11312 */ 11313 @Deprecated(since="1.1") 11314 public static boolean isSpace(char ch) { 11315 return (ch <= 0x0020) && 11316 (((((1L << 0x0009) | 11317 (1L << 0x000A) | 11318 (1L << 0x000C) | 11319 (1L << 0x000D) | 11320 (1L << 0x0020)) >> ch) & 1L) != 0); 11321 } 11322 11323 11324 /** 11325 * Determines if the specified character is a Unicode space character. 11326 * A character is considered to be a space character if and only if 11327 * it is specified to be a space character by the Unicode Standard. This 11328 * method returns true if the character's general category type is any of 11329 * the following: 11330 * <ul> 11331 * <li> {@code SPACE_SEPARATOR} 11332 * <li> {@code LINE_SEPARATOR} 11333 * <li> {@code PARAGRAPH_SEPARATOR} 11334 * </ul> 11335 * 11336 * <p><b>Note:</b> This method cannot handle <a 11337 * href="#supplementary"> supplementary characters</a>. To support 11338 * all Unicode characters, including supplementary characters, use 11339 * the {@link #isSpaceChar(int)} method. 11340 * 11341 * @param ch the character to be tested. 11342 * @return {@code true} if the character is a space character; 11343 * {@code false} otherwise. 11344 * @see Character#isWhitespace(char) 11345 * @since 1.1 11346 */ 11347 public static boolean isSpaceChar(char ch) { 11348 return isSpaceChar((int)ch); 11349 } 11350 11351 /** 11352 * Determines if the specified character (Unicode code point) is a 11353 * Unicode space character. A character is considered to be a 11354 * space character if and only if it is specified to be a space 11355 * character by the Unicode Standard. This method returns true if 11356 * the character's general category type is any of the following: 11357 * 11358 * <ul> 11359 * <li> {@link #SPACE_SEPARATOR} 11360 * <li> {@link #LINE_SEPARATOR} 11361 * <li> {@link #PARAGRAPH_SEPARATOR} 11362 * </ul> 11363 * 11364 * @param codePoint the character (Unicode code point) to be tested. 11365 * @return {@code true} if the character is a space character; 11366 * {@code false} otherwise. 11367 * @see Character#isWhitespace(int) 11368 * @since 1.5 11369 */ 11370 public static boolean isSpaceChar(int codePoint) { 11371 return ((((1 << Character.SPACE_SEPARATOR) | 11372 (1 << Character.LINE_SEPARATOR) | 11373 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11374 != 0; 11375 } 11376 11377 /** 11378 * Determines if the specified character is white space according to Java. 11379 * A character is a Java whitespace character if and only if it satisfies 11380 * one of the following criteria: 11381 * <ul> 11382 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11383 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11384 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11385 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11386 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11387 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11388 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11389 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11390 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11391 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11392 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11393 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11394 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11395 * </ul> 11396 * 11397 * <p><b>Note:</b> This method cannot handle <a 11398 * href="#supplementary"> supplementary characters</a>. To support 11399 * all Unicode characters, including supplementary characters, use 11400 * the {@link #isWhitespace(int)} method. 11401 * 11402 * @param ch the character to be tested. 11403 * @return {@code true} if the character is a Java whitespace 11404 * character; {@code false} otherwise. 11405 * @see Character#isSpaceChar(char) 11406 * @since 1.1 11407 */ 11408 public static boolean isWhitespace(char ch) { 11409 return isWhitespace((int)ch); 11410 } 11411 11412 /** 11413 * Determines if the specified character (Unicode code point) is 11414 * white space according to Java. A character is a Java 11415 * whitespace character if and only if it satisfies one of the 11416 * following criteria: 11417 * <ul> 11418 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11419 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11420 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11421 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11422 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11423 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11424 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11425 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11426 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11427 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11428 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11429 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11430 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11431 * </ul> 11432 * 11433 * @param codePoint the character (Unicode code point) to be tested. 11434 * @return {@code true} if the character is a Java whitespace 11435 * character; {@code false} otherwise. 11436 * @see Character#isSpaceChar(int) 11437 * @since 1.5 11438 */ 11439 public static boolean isWhitespace(int codePoint) { 11440 return CharacterData.of(codePoint).isWhitespace(codePoint); 11441 } 11442 11443 /** 11444 * Determines if the specified character is an ISO control 11445 * character. A character is considered to be an ISO control 11446 * character if its code is in the range {@code '\u005Cu0000'} 11447 * through {@code '\u005Cu001F'} or in the range 11448 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11449 * 11450 * <p><b>Note:</b> This method cannot handle <a 11451 * href="#supplementary"> supplementary characters</a>. To support 11452 * all Unicode characters, including supplementary characters, use 11453 * the {@link #isISOControl(int)} method. 11454 * 11455 * @param ch the character to be tested. 11456 * @return {@code true} if the character is an ISO control character; 11457 * {@code false} otherwise. 11458 * 11459 * @see Character#isSpaceChar(char) 11460 * @see Character#isWhitespace(char) 11461 * @since 1.1 11462 */ 11463 public static boolean isISOControl(char ch) { 11464 return isISOControl((int)ch); 11465 } 11466 11467 /** 11468 * Determines if the referenced character (Unicode code point) is an ISO control 11469 * character. A character is considered to be an ISO control 11470 * character if its code is in the range {@code '\u005Cu0000'} 11471 * through {@code '\u005Cu001F'} or in the range 11472 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11473 * 11474 * @param codePoint the character (Unicode code point) to be tested. 11475 * @return {@code true} if the character is an ISO control character; 11476 * {@code false} otherwise. 11477 * @see Character#isSpaceChar(int) 11478 * @see Character#isWhitespace(int) 11479 * @since 1.5 11480 */ 11481 public static boolean isISOControl(int codePoint) { 11482 // Optimized form of: 11483 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11484 // (codePoint >= 0x7F && codePoint <= 0x9F); 11485 return codePoint <= 0x9F && 11486 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11487 } 11488 11489 /** 11490 * Returns a value indicating a character's general category. 11491 * 11492 * <p><b>Note:</b> This method cannot handle <a 11493 * href="#supplementary"> supplementary characters</a>. To support 11494 * all Unicode characters, including supplementary characters, use 11495 * the {@link #getType(int)} method. 11496 * 11497 * @param ch the character to be tested. 11498 * @return a value of type {@code int} representing the 11499 * character's general category. 11500 * @see Character#COMBINING_SPACING_MARK 11501 * @see Character#CONNECTOR_PUNCTUATION 11502 * @see Character#CONTROL 11503 * @see Character#CURRENCY_SYMBOL 11504 * @see Character#DASH_PUNCTUATION 11505 * @see Character#DECIMAL_DIGIT_NUMBER 11506 * @see Character#ENCLOSING_MARK 11507 * @see Character#END_PUNCTUATION 11508 * @see Character#FINAL_QUOTE_PUNCTUATION 11509 * @see Character#FORMAT 11510 * @see Character#INITIAL_QUOTE_PUNCTUATION 11511 * @see Character#LETTER_NUMBER 11512 * @see Character#LINE_SEPARATOR 11513 * @see Character#LOWERCASE_LETTER 11514 * @see Character#MATH_SYMBOL 11515 * @see Character#MODIFIER_LETTER 11516 * @see Character#MODIFIER_SYMBOL 11517 * @see Character#NON_SPACING_MARK 11518 * @see Character#OTHER_LETTER 11519 * @see Character#OTHER_NUMBER 11520 * @see Character#OTHER_PUNCTUATION 11521 * @see Character#OTHER_SYMBOL 11522 * @see Character#PARAGRAPH_SEPARATOR 11523 * @see Character#PRIVATE_USE 11524 * @see Character#SPACE_SEPARATOR 11525 * @see Character#START_PUNCTUATION 11526 * @see Character#SURROGATE 11527 * @see Character#TITLECASE_LETTER 11528 * @see Character#UNASSIGNED 11529 * @see Character#UPPERCASE_LETTER 11530 * @since 1.1 11531 */ 11532 public static int getType(char ch) { 11533 return getType((int)ch); 11534 } 11535 11536 /** 11537 * Returns a value indicating a character's general category. 11538 * 11539 * @param codePoint the character (Unicode code point) to be tested. 11540 * @return a value of type {@code int} representing the 11541 * character's general category. 11542 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11543 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11544 * @see Character#CONTROL CONTROL 11545 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11546 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11547 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11548 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11549 * @see Character#END_PUNCTUATION END_PUNCTUATION 11550 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11551 * @see Character#FORMAT FORMAT 11552 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11553 * @see Character#LETTER_NUMBER LETTER_NUMBER 11554 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11555 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11556 * @see Character#MATH_SYMBOL MATH_SYMBOL 11557 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11558 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11559 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11560 * @see Character#OTHER_LETTER OTHER_LETTER 11561 * @see Character#OTHER_NUMBER OTHER_NUMBER 11562 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11563 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11564 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11565 * @see Character#PRIVATE_USE PRIVATE_USE 11566 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11567 * @see Character#START_PUNCTUATION START_PUNCTUATION 11568 * @see Character#SURROGATE SURROGATE 11569 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11570 * @see Character#UNASSIGNED UNASSIGNED 11571 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11572 * @since 1.5 11573 */ 11574 public static int getType(int codePoint) { 11575 return CharacterData.of(codePoint).getType(codePoint); 11576 } 11577 11578 /** 11579 * Determines the character representation for a specific digit in 11580 * the specified radix. If the value of {@code radix} is not a 11581 * valid radix, or the value of {@code digit} is not a valid 11582 * digit in the specified radix, the null character 11583 * ({@code '\u005Cu0000'}) is returned. 11584 * <p> 11585 * The {@code radix} argument is valid if it is greater than or 11586 * equal to {@code MIN_RADIX} and less than or equal to 11587 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11588 * {@code 0 <= digit < radix}. 11589 * <p> 11590 * If the digit is less than 10, then 11591 * {@code '0' + digit} is returned. Otherwise, the value 11592 * {@code 'a' + digit - 10} is returned. 11593 * 11594 * @param digit the number to convert to a character. 11595 * @param radix the radix. 11596 * @return the {@code char} representation of the specified digit 11597 * in the specified radix. 11598 * @see Character#MIN_RADIX 11599 * @see Character#MAX_RADIX 11600 * @see Character#digit(char, int) 11601 */ 11602 public static char forDigit(int digit, int radix) { 11603 if ((digit >= radix) || (digit < 0)) { 11604 return '\0'; 11605 } 11606 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11607 return '\0'; 11608 } 11609 if (digit < 10) { 11610 return (char)('0' + digit); 11611 } 11612 return (char)('a' - 10 + digit); 11613 } 11614 11615 /** 11616 * Returns the Unicode directionality property for the given 11617 * character. Character directionality is used to calculate the 11618 * visual ordering of text. The directionality value of undefined 11619 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11620 * 11621 * <p><b>Note:</b> This method cannot handle <a 11622 * href="#supplementary"> supplementary characters</a>. To support 11623 * all Unicode characters, including supplementary characters, use 11624 * the {@link #getDirectionality(int)} method. 11625 * 11626 * @param ch {@code char} for which the directionality property 11627 * is requested. 11628 * @return the directionality property of the {@code char} value. 11629 * 11630 * @see Character#DIRECTIONALITY_UNDEFINED 11631 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11632 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11633 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11634 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11635 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11636 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11637 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11638 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11639 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11640 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11641 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11642 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11643 * @see Character#DIRECTIONALITY_WHITESPACE 11644 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11645 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11646 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11647 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11648 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11649 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11650 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11651 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11652 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11653 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11654 * @since 1.4 11655 */ 11656 public static byte getDirectionality(char ch) { 11657 return getDirectionality((int)ch); 11658 } 11659 11660 /** 11661 * Returns the Unicode directionality property for the given 11662 * character (Unicode code point). Character directionality is 11663 * used to calculate the visual ordering of text. The 11664 * directionality value of undefined character is {@link 11665 * #DIRECTIONALITY_UNDEFINED}. 11666 * 11667 * @param codePoint the character (Unicode code point) for which 11668 * the directionality property is requested. 11669 * @return the directionality property of the character. 11670 * 11671 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11672 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11673 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11674 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11675 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11676 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11677 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11678 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11679 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11680 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11681 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11682 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11683 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11684 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11685 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11686 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11687 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11688 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11689 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11690 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11691 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11692 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11693 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11694 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11695 * @since 1.5 11696 */ 11697 public static byte getDirectionality(int codePoint) { 11698 return CharacterData.of(codePoint).getDirectionality(codePoint); 11699 } 11700 11701 /** 11702 * Determines whether the character is mirrored according to the 11703 * Unicode specification. Mirrored characters should have their 11704 * glyphs horizontally mirrored when displayed in text that is 11705 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11706 * PARENTHESIS is semantically defined to be an <i>opening 11707 * parenthesis</i>. This will appear as a "(" in text that is 11708 * left-to-right but as a ")" in text that is right-to-left. 11709 * 11710 * <p><b>Note:</b> This method cannot handle <a 11711 * href="#supplementary"> supplementary characters</a>. To support 11712 * all Unicode characters, including supplementary characters, use 11713 * the {@link #isMirrored(int)} method. 11714 * 11715 * @param ch {@code char} for which the mirrored property is requested 11716 * @return {@code true} if the char is mirrored, {@code false} 11717 * if the {@code char} is not mirrored or is not defined. 11718 * @since 1.4 11719 */ 11720 public static boolean isMirrored(char ch) { 11721 return isMirrored((int)ch); 11722 } 11723 11724 /** 11725 * Determines whether the specified character (Unicode code point) 11726 * is mirrored according to the Unicode specification. Mirrored 11727 * characters should have their glyphs horizontally mirrored when 11728 * displayed in text that is right-to-left. For example, 11729 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 11730 * defined to be an <i>opening parenthesis</i>. This will appear 11731 * as a "(" in text that is left-to-right but as a ")" in text 11732 * that is right-to-left. 11733 * 11734 * @param codePoint the character (Unicode code point) to be tested. 11735 * @return {@code true} if the character is mirrored, {@code false} 11736 * if the character is not mirrored or is not defined. 11737 * @since 1.5 11738 */ 11739 public static boolean isMirrored(int codePoint) { 11740 return CharacterData.of(codePoint).isMirrored(codePoint); 11741 } 11742 11743 /** 11744 * Compares two {@code Character} objects numerically. 11745 * 11746 * @param anotherCharacter the {@code Character} to be compared. 11747 * @return the value {@code 0} if the argument {@code Character} 11748 * is equal to this {@code Character}; a value less than 11749 * {@code 0} if this {@code Character} is numerically less 11750 * than the {@code Character} argument; and a value greater than 11751 * {@code 0} if this {@code Character} is numerically greater 11752 * than the {@code Character} argument (unsigned comparison). 11753 * Note that this is strictly a numerical comparison; it is not 11754 * locale-dependent. 11755 * @since 1.2 11756 */ 11757 public int compareTo(Character anotherCharacter) { 11758 return compare(this.value, anotherCharacter.value); 11759 } 11760 11761 /** 11762 * Compares two {@code char} values numerically. 11763 * The value returned is identical to what would be returned by: 11764 * <pre> 11765 * Character.valueOf(x).compareTo(Character.valueOf(y)) 11766 * </pre> 11767 * 11768 * @param x the first {@code char} to compare 11769 * @param y the second {@code char} to compare 11770 * @return the value {@code 0} if {@code x == y}; 11771 * a value less than {@code 0} if {@code x < y}; and 11772 * a value greater than {@code 0} if {@code x > y} 11773 * @since 1.7 11774 */ 11775 public static int compare(char x, char y) { 11776 return x - y; 11777 } 11778 11779 /** 11780 * Converts the character (Unicode code point) argument to uppercase using 11781 * information from the UnicodeData file. 11782 * 11783 * @param codePoint the character (Unicode code point) to be converted. 11784 * @return either the uppercase equivalent of the character, if 11785 * any, or an error flag ({@code Character.ERROR}) 11786 * that indicates that a 1:M {@code char} mapping exists. 11787 * @see Character#isLowerCase(char) 11788 * @see Character#isUpperCase(char) 11789 * @see Character#toLowerCase(char) 11790 * @see Character#toTitleCase(char) 11791 * @since 1.4 11792 */ 11793 static int toUpperCaseEx(int codePoint) { 11794 assert isValidCodePoint(codePoint); 11795 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 11796 } 11797 11798 /** 11799 * Converts the character (Unicode code point) argument to uppercase using case 11800 * mapping information from the SpecialCasing file in the Unicode 11801 * specification. If a character has no explicit uppercase 11802 * mapping, then the {@code char} itself is returned in the 11803 * {@code char[]}. 11804 * 11805 * @param codePoint the character (Unicode code point) to be converted. 11806 * @return a {@code char[]} with the uppercased character. 11807 * @since 1.4 11808 */ 11809 static char[] toUpperCaseCharArray(int codePoint) { 11810 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 11811 assert isBmpCodePoint(codePoint); 11812 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 11813 } 11814 11815 /** 11816 * The number of bits used to represent a {@code char} value in unsigned 11817 * binary form, constant {@code 16}. 11818 * 11819 * @since 1.5 11820 */ 11821 public static final int SIZE = 16; 11822 11823 /** 11824 * The number of bytes used to represent a {@code char} value in unsigned 11825 * binary form. 11826 * 11827 * @since 1.8 11828 */ 11829 public static final int BYTES = SIZE / Byte.SIZE; 11830 11831 /** 11832 * Returns the value obtained by reversing the order of the bytes in the 11833 * specified {@code char} value. 11834 * 11835 * @param ch The {@code char} of which to reverse the byte order. 11836 * @return the value obtained by reversing (or, equivalently, swapping) 11837 * the bytes in the specified {@code char} value. 11838 * @since 1.5 11839 */ 11840 @IntrinsicCandidate 11841 public static char reverseBytes(char ch) { 11842 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 11843 } 11844 11845 /** 11846 * Returns the name of the specified character 11847 * {@code codePoint}, or null if the code point is 11848 * {@link #UNASSIGNED unassigned}. 11849 * <p> 11850 * If the specified character is not assigned a name by 11851 * the <i>UnicodeData</i> file (part of the Unicode Character 11852 * Database maintained by the Unicode Consortium), the returned 11853 * name is the same as the result of the expression: 11854 * 11855 * <blockquote>{@code 11856 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11857 * + " " 11858 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11859 * 11860 * }</blockquote> 11861 * 11862 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 11863 * returned by this method follows the naming scheme in the 11864 * "Unicode Name Property" section of the Unicode Standard. For other 11865 * code points, such as Hangul/Ideographs, The name generation rule above 11866 * differs from the one defined in the Unicode Standard. 11867 * 11868 * @param codePoint the character (Unicode code point) 11869 * 11870 * @return the name of the specified character, or null if 11871 * the code point is unassigned. 11872 * 11873 * @throws IllegalArgumentException if the specified 11874 * {@code codePoint} is not a valid Unicode 11875 * code point. 11876 * 11877 * @since 1.7 11878 */ 11879 public static String getName(int codePoint) { 11880 if (!isValidCodePoint(codePoint)) { 11881 throw new IllegalArgumentException( 11882 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 11883 } 11884 String name = CharacterName.getInstance().getName(codePoint); 11885 if (name != null) 11886 return name; 11887 if (getType(codePoint) == UNASSIGNED) 11888 return null; 11889 UnicodeBlock block = UnicodeBlock.of(codePoint); 11890 if (block != null) 11891 return block.toString().replace('_', ' ') + " " 11892 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11893 // should never come here 11894 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11895 } 11896 11897 /** 11898 * Returns the code point value of the Unicode character specified by 11899 * the given character name. 11900 * <p> 11901 * If a character is not assigned a name by the <i>UnicodeData</i> 11902 * file (part of the Unicode Character Database maintained by the Unicode 11903 * Consortium), its name is defined as the result of the expression: 11904 * 11905 * <blockquote>{@code 11906 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11907 * + " " 11908 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11909 * 11910 * }</blockquote> 11911 * <p> 11912 * The {@code name} matching is case insensitive, with any leading and 11913 * trailing whitespace character removed. 11914 * 11915 * For the code points in the <i>UnicodeData</i> file, this method 11916 * recognizes the name which conforms to the name defined in the 11917 * "Unicode Name Property" section in the Unicode Standard. For other 11918 * code points, this method recognizes the name generated with 11919 * {@link #getName(int)} method. 11920 * 11921 * @param name the character name 11922 * 11923 * @return the code point value of the character specified by its name. 11924 * 11925 * @throws IllegalArgumentException if the specified {@code name} 11926 * is not a valid character name. 11927 * @throws NullPointerException if {@code name} is {@code null} 11928 * 11929 * @since 9 11930 */ 11931 public static int codePointOf(String name) { 11932 name = name.trim().toUpperCase(Locale.ROOT); 11933 int cp = CharacterName.getInstance().getCodePoint(name); 11934 if (cp != -1) 11935 return cp; 11936 try { 11937 int off = name.lastIndexOf(' '); 11938 if (off != -1) { 11939 cp = Integer.parseInt(name, off + 1, name.length(), 16); 11940 if (isValidCodePoint(cp) && name.equals(getName(cp))) 11941 return cp; 11942 } 11943 } catch (Exception x) {} 11944 throw new IllegalArgumentException("Unrecognized character name :" + name); 11945 } 11946 }