1 /* 2 * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import jdk.internal.misc.CDS; 29 import jdk.internal.vm.annotation.IntrinsicCandidate; 30 import jdk.internal.vm.annotation.Stable; 31 32 import java.lang.constant.Constable; 33 import java.lang.constant.DynamicConstantDesc; 34 import java.util.Arrays; 35 import java.util.HashMap; 36 import java.util.Locale; 37 import java.util.Map; 38 import java.util.Objects; 39 import java.util.Optional; 40 41 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 42 import static java.lang.constant.ConstantDescs.CD_char; 43 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 44 45 /** 46 * The {@code Character} class is the {@linkplain 47 * java.lang##wrapperClass wrapper class} for values of the primitive 48 * type {@code char}. An object of type {@code Character} contains a 49 * single field whose type is {@code char}. 50 * 51 * <p>In addition, this class provides a large number of static methods for 52 * determining a character's category (lowercase letter, digit, etc.) 53 * and for converting characters from uppercase to lowercase and vice 54 * versa. 55 * 56 * <h2><a id="conformance">Unicode Conformance</a></h2> 57 * <p> 58 * The fields and methods of class {@code Character} are defined in terms 59 * of character information from the Unicode Standard, specifically the 60 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 61 * This file specifies properties including name and category for every 62 * assigned Unicode code point or character range. The file is available 63 * from the Unicode Consortium at 64 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 65 * <p> 66 * Character information is based on the Unicode Standard, version 16.0. 67 * <p> 68 * The Java platform has supported different versions of the Unicode 69 * Standard over time. Upgrades to newer versions of the Unicode Standard 70 * occurred in the following Java releases, each indicating the new version: 71 * <table class="striped"> 72 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 73 * <thead> 74 * <tr><th scope="col">Java release</th> 75 * <th scope="col">Unicode version</th></tr> 76 * </thead> 77 * <tbody> 78 * <tr><th scope="row" style="text-align:left">Java SE 24</th> 79 * <td>Unicode 16.0</td></tr> 80 * <tr><th scope="row" style="text-align:left">Java SE 22</th> 81 * <td>Unicode 15.1</td></tr> 82 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 83 * <td>Unicode 15.0</td></tr> 84 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 85 * <td>Unicode 14.0</td></tr> 86 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 87 * <td>Unicode 13.0</td></tr> 88 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 89 * <td>Unicode 12.1</td></tr> 90 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 91 * <td>Unicode 11.0</td></tr> 92 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 93 * <td>Unicode 10.0</td></tr> 94 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 95 * <td>Unicode 8.0</td></tr> 96 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 97 * <td>Unicode 6.2</td></tr> 98 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 99 * <td>Unicode 6.0</td></tr> 100 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 101 * <td>Unicode 4.0</td></tr> 102 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 103 * <td>Unicode 3.0</td></tr> 104 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 105 * <td>Unicode 2.0</td></tr> 106 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 107 * <td>Unicode 1.1.5</td></tr> 108 * </tbody> 109 * </table> 110 * Variations from these base Unicode versions, such as recognized appendixes, 111 * are documented elsewhere. 112 * <h2><a id="unicode">Unicode Character Representations</a></h2> 113 * 114 * <p>The {@code char} data type (and therefore the value that a 115 * {@code Character} object encapsulates) are based on the 116 * original Unicode specification, which defined characters as 117 * fixed-width 16-bit entities. The Unicode Standard has since been 118 * changed to allow for characters whose representation requires more 119 * than 16 bits. The range of legal <em>code point</em>s is now 120 * U+0000 to U+10FFFF, known as 121 * <em><a href="https://www.unicode.org/glossary/#unicode_scalar_value"> 122 * Unicode scalar value</a></em>. 123 * 124 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 125 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 126 * <a id="supplementary">Characters</a> whose code points are greater 127 * than U+FFFF are called <em>supplementary character</em>s. The Java 128 * platform uses the UTF-16 representation in {@code char} arrays and 129 * in the {@code String} and {@code StringBuffer} classes. In 130 * this representation, supplementary characters are represented as a pair 131 * of {@code char} values, the first from the <em>high-surrogates</em> 132 * range, (\uD800-\uDBFF), the second from the 133 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 134 * 135 * <p>A {@code char} value, therefore, represents Basic 136 * Multilingual Plane (BMP) code points, including the surrogate 137 * code points, or code units of the UTF-16 encoding. An 138 * {@code int} value represents all Unicode code points, 139 * including supplementary code points. The lower (least significant) 140 * 21 bits of {@code int} are used to represent Unicode code 141 * points and the upper (most significant) 11 bits must be zero. 142 * Unless otherwise specified, the behavior with respect to 143 * supplementary characters and surrogate {@code char} values is 144 * as follows: 145 * 146 * <ul> 147 * <li>The methods that only accept a {@code char} value cannot support 148 * supplementary characters. They treat {@code char} values from the 149 * surrogate ranges as undefined characters. For example, 150 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 151 * this specific value if followed by any low-surrogate value in a string 152 * would represent a letter. 153 * 154 * <li>The methods that accept an {@code int} value support all 155 * Unicode characters, including supplementary characters. For 156 * example, {@code Character.isLetter(0x2F81A)} returns 157 * {@code true} because the code point value represents a letter 158 * (a CJK ideograph). 159 * </ul> 160 * 161 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 162 * used for character values in the range between U+0000 and U+10FFFF, 163 * and <em>Unicode code unit</em> is used for 16-bit 164 * {@code char} values that are code units of the <em>UTF-16</em> 165 * encoding. For more information on Unicode terminology, refer to the 166 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 167 * 168 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 169 * class; programmers should treat instances that are 170 * {@linkplain #equals(Object) equal} as interchangeable and should not 171 * use instances for synchronization, or unpredictable behavior may 172 * occur. For example, in a future release, synchronization may fail. 173 * 174 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 175 * @author Lee Boynton 176 * @author Guy Steele 177 * @author Akira Tanaka 178 * @author Martin Buchholz 179 * @author Ulf Zibis 180 * @since 1.0 181 */ 182 @jdk.internal.ValueBased 183 public final 184 class Character implements java.io.Serializable, Comparable<Character>, Constable { 185 /** 186 * The minimum radix available for conversion to and from strings. 187 * The constant value of this field is the smallest value permitted 188 * for the radix argument in radix-conversion methods such as the 189 * {@code digit} method, the {@code forDigit} method, and the 190 * {@code toString} method of class {@code Integer}. 191 * 192 * @see Character#digit(char, int) 193 * @see Character#forDigit(int, int) 194 * @see Integer#toString(int, int) 195 * @see Integer#valueOf(String) 196 */ 197 public static final int MIN_RADIX = 2; 198 199 /** 200 * The maximum radix available for conversion to and from strings. 201 * The constant value of this field is the largest value permitted 202 * for the radix argument in radix-conversion methods such as the 203 * {@code digit} method, the {@code forDigit} method, and the 204 * {@code toString} method of class {@code Integer}. 205 * 206 * @see Character#digit(char, int) 207 * @see Character#forDigit(int, int) 208 * @see Integer#toString(int, int) 209 * @see Integer#valueOf(String) 210 */ 211 public static final int MAX_RADIX = 36; 212 213 /** 214 * The constant value of this field is the smallest value of type 215 * {@code char}, {@code '\u005Cu0000'}. 216 * 217 * @since 1.0.2 218 */ 219 public static final char MIN_VALUE = '\u0000'; 220 221 /** 222 * The constant value of this field is the largest value of type 223 * {@code char}, {@code '\u005CuFFFF'}. 224 * 225 * @since 1.0.2 226 */ 227 public static final char MAX_VALUE = '\uFFFF'; 228 229 /** 230 * The {@code Class} instance representing the primitive type 231 * {@code char}. 232 * 233 * @since 1.1 234 */ 235 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 236 237 /* 238 * Normative general types 239 */ 240 241 /* 242 * General character types 243 */ 244 245 /** 246 * General category "Cn" in the Unicode specification. 247 * @since 1.1 248 */ 249 public static final byte UNASSIGNED = 0; 250 251 /** 252 * General category "Lu" in the Unicode specification. 253 * @since 1.1 254 */ 255 public static final byte UPPERCASE_LETTER = 1; 256 257 /** 258 * General category "Ll" in the Unicode specification. 259 * @since 1.1 260 */ 261 public static final byte LOWERCASE_LETTER = 2; 262 263 /** 264 * General category "Lt" in the Unicode specification. 265 * @since 1.1 266 */ 267 public static final byte TITLECASE_LETTER = 3; 268 269 /** 270 * General category "Lm" in the Unicode specification. 271 * @since 1.1 272 */ 273 public static final byte MODIFIER_LETTER = 4; 274 275 /** 276 * General category "Lo" in the Unicode specification. 277 * @since 1.1 278 */ 279 public static final byte OTHER_LETTER = 5; 280 281 /** 282 * General category "Mn" in the Unicode specification. 283 * @since 1.1 284 */ 285 public static final byte NON_SPACING_MARK = 6; 286 287 /** 288 * General category "Me" in the Unicode specification. 289 * @since 1.1 290 */ 291 public static final byte ENCLOSING_MARK = 7; 292 293 /** 294 * General category "Mc" in the Unicode specification. 295 * @since 1.1 296 */ 297 public static final byte COMBINING_SPACING_MARK = 8; 298 299 /** 300 * General category "Nd" in the Unicode specification. 301 * @since 1.1 302 */ 303 public static final byte DECIMAL_DIGIT_NUMBER = 9; 304 305 /** 306 * General category "Nl" in the Unicode specification. 307 * @since 1.1 308 */ 309 public static final byte LETTER_NUMBER = 10; 310 311 /** 312 * General category "No" in the Unicode specification. 313 * @since 1.1 314 */ 315 public static final byte OTHER_NUMBER = 11; 316 317 /** 318 * General category "Zs" in the Unicode specification. 319 * @since 1.1 320 */ 321 public static final byte SPACE_SEPARATOR = 12; 322 323 /** 324 * General category "Zl" in the Unicode specification. 325 * @since 1.1 326 */ 327 public static final byte LINE_SEPARATOR = 13; 328 329 /** 330 * General category "Zp" in the Unicode specification. 331 * @since 1.1 332 */ 333 public static final byte PARAGRAPH_SEPARATOR = 14; 334 335 /** 336 * General category "Cc" in the Unicode specification. 337 * @since 1.1 338 */ 339 public static final byte CONTROL = 15; 340 341 /** 342 * General category "Cf" in the Unicode specification. 343 * @since 1.1 344 */ 345 public static final byte FORMAT = 16; 346 347 /** 348 * General category "Co" in the Unicode specification. 349 * @since 1.1 350 */ 351 public static final byte PRIVATE_USE = 18; 352 353 /** 354 * General category "Cs" in the Unicode specification. 355 * @since 1.1 356 */ 357 public static final byte SURROGATE = 19; 358 359 /** 360 * General category "Pd" in the Unicode specification. 361 * @since 1.1 362 */ 363 public static final byte DASH_PUNCTUATION = 20; 364 365 /** 366 * General category "Ps" in the Unicode specification. 367 * @since 1.1 368 */ 369 public static final byte START_PUNCTUATION = 21; 370 371 /** 372 * General category "Pe" in the Unicode specification. 373 * @since 1.1 374 */ 375 public static final byte END_PUNCTUATION = 22; 376 377 /** 378 * General category "Pc" in the Unicode specification. 379 * @since 1.1 380 */ 381 public static final byte CONNECTOR_PUNCTUATION = 23; 382 383 /** 384 * General category "Po" in the Unicode specification. 385 * @since 1.1 386 */ 387 public static final byte OTHER_PUNCTUATION = 24; 388 389 /** 390 * General category "Sm" in the Unicode specification. 391 * @since 1.1 392 */ 393 public static final byte MATH_SYMBOL = 25; 394 395 /** 396 * General category "Sc" in the Unicode specification. 397 * @since 1.1 398 */ 399 public static final byte CURRENCY_SYMBOL = 26; 400 401 /** 402 * General category "Sk" in the Unicode specification. 403 * @since 1.1 404 */ 405 public static final byte MODIFIER_SYMBOL = 27; 406 407 /** 408 * General category "So" in the Unicode specification. 409 * @since 1.1 410 */ 411 public static final byte OTHER_SYMBOL = 28; 412 413 /** 414 * General category "Pi" in the Unicode specification. 415 * @since 1.4 416 */ 417 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 418 419 /** 420 * General category "Pf" in the Unicode specification. 421 * @since 1.4 422 */ 423 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 424 425 /** 426 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 427 */ 428 static final int ERROR = 0xFFFFFFFF; 429 430 431 /** 432 * Undefined bidirectional character type. Undefined {@code char} 433 * values have undefined directionality in the Unicode specification. 434 * @since 1.4 435 */ 436 public static final byte DIRECTIONALITY_UNDEFINED = -1; 437 438 /** 439 * Strong bidirectional character type "L" in the Unicode specification. 440 * @since 1.4 441 */ 442 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 443 444 /** 445 * Strong bidirectional character type "R" in the Unicode specification. 446 * @since 1.4 447 */ 448 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 449 450 /** 451 * Strong bidirectional character type "AL" in the Unicode specification. 452 * @since 1.4 453 */ 454 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 455 456 /** 457 * Weak bidirectional character type "EN" in the Unicode specification. 458 * @since 1.4 459 */ 460 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 461 462 /** 463 * Weak bidirectional character type "ES" in the Unicode specification. 464 * @since 1.4 465 */ 466 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 467 468 /** 469 * Weak bidirectional character type "ET" in the Unicode specification. 470 * @since 1.4 471 */ 472 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 473 474 /** 475 * Weak bidirectional character type "AN" in the Unicode specification. 476 * @since 1.4 477 */ 478 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 479 480 /** 481 * Weak bidirectional character type "CS" in the Unicode specification. 482 * @since 1.4 483 */ 484 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 485 486 /** 487 * Weak bidirectional character type "NSM" in the Unicode specification. 488 * @since 1.4 489 */ 490 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 491 492 /** 493 * Weak bidirectional character type "BN" in the Unicode specification. 494 * @since 1.4 495 */ 496 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 497 498 /** 499 * Neutral bidirectional character type "B" in the Unicode specification. 500 * @since 1.4 501 */ 502 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 503 504 /** 505 * Neutral bidirectional character type "S" in the Unicode specification. 506 * @since 1.4 507 */ 508 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 509 510 /** 511 * Neutral bidirectional character type "WS" in the Unicode specification. 512 * @since 1.4 513 */ 514 public static final byte DIRECTIONALITY_WHITESPACE = 12; 515 516 /** 517 * Neutral bidirectional character type "ON" in the Unicode specification. 518 * @since 1.4 519 */ 520 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 521 522 /** 523 * Strong bidirectional character type "LRE" in the Unicode specification. 524 * @since 1.4 525 */ 526 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 527 528 /** 529 * Strong bidirectional character type "LRO" in the Unicode specification. 530 * @since 1.4 531 */ 532 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 533 534 /** 535 * Strong bidirectional character type "RLE" in the Unicode specification. 536 * @since 1.4 537 */ 538 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 539 540 /** 541 * Strong bidirectional character type "RLO" in the Unicode specification. 542 * @since 1.4 543 */ 544 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 545 546 /** 547 * Weak bidirectional character type "PDF" in the Unicode specification. 548 * @since 1.4 549 */ 550 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 551 552 /** 553 * Weak bidirectional character type "LRI" in the Unicode specification. 554 * @since 9 555 */ 556 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 557 558 /** 559 * Weak bidirectional character type "RLI" in the Unicode specification. 560 * @since 9 561 */ 562 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 563 564 /** 565 * Weak bidirectional character type "FSI" in the Unicode specification. 566 * @since 9 567 */ 568 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 569 570 /** 571 * Weak bidirectional character type "PDI" in the Unicode specification. 572 * @since 9 573 */ 574 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 575 576 /** 577 * The minimum value of a 578 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 579 * Unicode high-surrogate code unit</a> 580 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 581 * A high-surrogate is also known as a <i>leading-surrogate</i>. 582 * 583 * @since 1.5 584 */ 585 public static final char MIN_HIGH_SURROGATE = '\uD800'; 586 587 /** 588 * The maximum value of a 589 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 590 * Unicode high-surrogate code unit</a> 591 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 592 * A high-surrogate is also known as a <i>leading-surrogate</i>. 593 * 594 * @since 1.5 595 */ 596 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 597 598 /** 599 * The minimum value of a 600 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 601 * Unicode low-surrogate code unit</a> 602 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 603 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 604 * 605 * @since 1.5 606 */ 607 public static final char MIN_LOW_SURROGATE = '\uDC00'; 608 609 /** 610 * The maximum value of a 611 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 612 * Unicode low-surrogate code unit</a> 613 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 614 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 615 * 616 * @since 1.5 617 */ 618 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 619 620 /** 621 * The minimum value of a Unicode surrogate code unit in the 622 * UTF-16 encoding, constant {@code '\u005CuD800'}. 623 * 624 * @since 1.5 625 */ 626 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 627 628 /** 629 * The maximum value of a Unicode surrogate code unit in the 630 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 631 * 632 * @since 1.5 633 */ 634 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 635 636 /** 637 * The minimum value of a 638 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 639 * Unicode supplementary code point</a>, constant {@code U+10000}. 640 * 641 * @since 1.5 642 */ 643 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 644 645 /** 646 * The minimum value of a 647 * <a href="http://www.unicode.org/glossary/#code_point"> 648 * Unicode code point</a>, constant {@code U+0000}. 649 * 650 * @since 1.5 651 */ 652 public static final int MIN_CODE_POINT = 0x000000; 653 654 /** 655 * The maximum value of a 656 * <a href="http://www.unicode.org/glossary/#code_point"> 657 * Unicode code point</a>, constant {@code U+10FFFF}. 658 * 659 * @since 1.5 660 */ 661 public static final int MAX_CODE_POINT = 0X10FFFF; 662 663 /** 664 * Returns an {@link Optional} containing the nominal descriptor for this 665 * instance. 666 * 667 * @return an {@link Optional} describing the {@linkplain Character} instance 668 * @since 15 669 */ 670 @Override 671 public Optional<DynamicConstantDesc<Character>> describeConstable() { 672 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 673 } 674 675 /** 676 * Instances of this class represent particular subsets of the Unicode 677 * character set. The only family of subsets defined in the 678 * {@code Character} class is {@link Character.UnicodeBlock}. 679 * Other portions of the Java API may define other subsets for their 680 * own purposes. 681 * 682 * @since 1.2 683 */ 684 public static class Subset { 685 686 private String name; 687 688 /** 689 * Constructs a new {@code Subset} instance. 690 * 691 * @param name The name of this subset 692 * @throws NullPointerException if name is {@code null} 693 */ 694 protected Subset(String name) { 695 if (name == null) { 696 throw new NullPointerException("name"); 697 } 698 this.name = name; 699 } 700 701 /** 702 * Compares two {@code Subset} objects for equality. 703 * This method returns {@code true} if and only if 704 * {@code this} and the argument refer to the same 705 * object; since this method is {@code final}, this 706 * guarantee holds for all subclasses. 707 */ 708 public final boolean equals(Object obj) { 709 return (this == obj); 710 } 711 712 /** 713 * Returns the standard hash code as defined by the 714 * {@link Object#hashCode} method. This method 715 * is {@code final} in order to ensure that the 716 * {@code equals} and {@code hashCode} methods will 717 * be consistent in all subclasses. 718 */ 719 public final int hashCode() { 720 return super.hashCode(); 721 } 722 723 /** 724 * Returns the name of this subset. 725 */ 726 public final String toString() { 727 return name; 728 } 729 } 730 731 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 732 // for the latest specification of Unicode Blocks. 733 734 /** 735 * A family of character subsets representing the character blocks in the 736 * Unicode specification. Character blocks generally define characters 737 * used for a specific script or purpose. A character is contained by 738 * at most one Unicode block. 739 * 740 * @since 1.2 741 */ 742 public static final class UnicodeBlock extends Subset { 743 /** 744 * NUM_ENTITIES should match the total number of UnicodeBlocks. 745 * It should be adjusted whenever the Unicode Character Database 746 * is upgraded. 747 */ 748 private static final int NUM_ENTITIES = 782; 749 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 750 751 /** 752 * Creates a UnicodeBlock with the given identifier name. 753 * This name must be the same as the block identifier. 754 */ 755 private UnicodeBlock(String idName) { 756 super(idName); 757 map.put(idName, this); 758 } 759 760 /** 761 * Creates a UnicodeBlock with the given identifier name and 762 * alias name. 763 */ 764 private UnicodeBlock(String idName, String alias) { 765 this(idName); 766 map.put(alias, this); 767 } 768 769 /** 770 * Creates a UnicodeBlock with the given identifier name and 771 * alias names. 772 */ 773 private UnicodeBlock(String idName, String... aliases) { 774 this(idName); 775 for (String alias : aliases) 776 map.put(alias, this); 777 } 778 779 /** 780 * Constant for the "Basic Latin" Unicode character block. 781 * @since 1.2 782 */ 783 public static final UnicodeBlock BASIC_LATIN = 784 new UnicodeBlock("BASIC_LATIN", 785 "BASIC LATIN", 786 "BASICLATIN"); 787 788 /** 789 * Constant for the "Latin-1 Supplement" Unicode character block. 790 * @since 1.2 791 */ 792 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 793 new UnicodeBlock("LATIN_1_SUPPLEMENT", 794 "LATIN-1 SUPPLEMENT", 795 "LATIN-1SUPPLEMENT"); 796 797 /** 798 * Constant for the "Latin Extended-A" Unicode character block. 799 * @since 1.2 800 */ 801 public static final UnicodeBlock LATIN_EXTENDED_A = 802 new UnicodeBlock("LATIN_EXTENDED_A", 803 "LATIN EXTENDED-A", 804 "LATINEXTENDED-A"); 805 806 /** 807 * Constant for the "Latin Extended-B" Unicode character block. 808 * @since 1.2 809 */ 810 public static final UnicodeBlock LATIN_EXTENDED_B = 811 new UnicodeBlock("LATIN_EXTENDED_B", 812 "LATIN EXTENDED-B", 813 "LATINEXTENDED-B"); 814 815 /** 816 * Constant for the "IPA Extensions" Unicode character block. 817 * @since 1.2 818 */ 819 public static final UnicodeBlock IPA_EXTENSIONS = 820 new UnicodeBlock("IPA_EXTENSIONS", 821 "IPA EXTENSIONS", 822 "IPAEXTENSIONS"); 823 824 /** 825 * Constant for the "Spacing Modifier Letters" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 829 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 830 "SPACING MODIFIER LETTERS", 831 "SPACINGMODIFIERLETTERS"); 832 833 /** 834 * Constant for the "Combining Diacritical Marks" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 838 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 839 "COMBINING DIACRITICAL MARKS", 840 "COMBININGDIACRITICALMARKS"); 841 842 /** 843 * Constant for the "Greek and Coptic" Unicode character block. 844 * <p> 845 * This block was previously known as the "Greek" block. 846 * 847 * @since 1.2 848 */ 849 public static final UnicodeBlock GREEK = 850 new UnicodeBlock("GREEK", 851 "GREEK AND COPTIC", 852 "GREEKANDCOPTIC"); 853 854 /** 855 * Constant for the "Cyrillic" Unicode character block. 856 * @since 1.2 857 */ 858 public static final UnicodeBlock CYRILLIC = 859 new UnicodeBlock("CYRILLIC"); 860 861 /** 862 * Constant for the "Armenian" Unicode character block. 863 * @since 1.2 864 */ 865 public static final UnicodeBlock ARMENIAN = 866 new UnicodeBlock("ARMENIAN"); 867 868 /** 869 * Constant for the "Hebrew" Unicode character block. 870 * @since 1.2 871 */ 872 public static final UnicodeBlock HEBREW = 873 new UnicodeBlock("HEBREW"); 874 875 /** 876 * Constant for the "Arabic" Unicode character block. 877 * @since 1.2 878 */ 879 public static final UnicodeBlock ARABIC = 880 new UnicodeBlock("ARABIC"); 881 882 /** 883 * Constant for the "Devanagari" Unicode character block. 884 * @since 1.2 885 */ 886 public static final UnicodeBlock DEVANAGARI = 887 new UnicodeBlock("DEVANAGARI"); 888 889 /** 890 * Constant for the "Bengali" Unicode character block. 891 * @since 1.2 892 */ 893 public static final UnicodeBlock BENGALI = 894 new UnicodeBlock("BENGALI"); 895 896 /** 897 * Constant for the "Gurmukhi" Unicode character block. 898 * @since 1.2 899 */ 900 public static final UnicodeBlock GURMUKHI = 901 new UnicodeBlock("GURMUKHI"); 902 903 /** 904 * Constant for the "Gujarati" Unicode character block. 905 * @since 1.2 906 */ 907 public static final UnicodeBlock GUJARATI = 908 new UnicodeBlock("GUJARATI"); 909 910 /** 911 * Constant for the "Oriya" Unicode character block. 912 * @since 1.2 913 */ 914 public static final UnicodeBlock ORIYA = 915 new UnicodeBlock("ORIYA"); 916 917 /** 918 * Constant for the "Tamil" Unicode character block. 919 * @since 1.2 920 */ 921 public static final UnicodeBlock TAMIL = 922 new UnicodeBlock("TAMIL"); 923 924 /** 925 * Constant for the "Telugu" Unicode character block. 926 * @since 1.2 927 */ 928 public static final UnicodeBlock TELUGU = 929 new UnicodeBlock("TELUGU"); 930 931 /** 932 * Constant for the "Kannada" Unicode character block. 933 * @since 1.2 934 */ 935 public static final UnicodeBlock KANNADA = 936 new UnicodeBlock("KANNADA"); 937 938 /** 939 * Constant for the "Malayalam" Unicode character block. 940 * @since 1.2 941 */ 942 public static final UnicodeBlock MALAYALAM = 943 new UnicodeBlock("MALAYALAM"); 944 945 /** 946 * Constant for the "Thai" Unicode character block. 947 * @since 1.2 948 */ 949 public static final UnicodeBlock THAI = 950 new UnicodeBlock("THAI"); 951 952 /** 953 * Constant for the "Lao" Unicode character block. 954 * @since 1.2 955 */ 956 public static final UnicodeBlock LAO = 957 new UnicodeBlock("LAO"); 958 959 /** 960 * Constant for the "Tibetan" Unicode character block. 961 * @since 1.2 962 */ 963 public static final UnicodeBlock TIBETAN = 964 new UnicodeBlock("TIBETAN"); 965 966 /** 967 * Constant for the "Georgian" Unicode character block. 968 * @since 1.2 969 */ 970 public static final UnicodeBlock GEORGIAN = 971 new UnicodeBlock("GEORGIAN"); 972 973 /** 974 * Constant for the "Hangul Jamo" Unicode character block. 975 * @since 1.2 976 */ 977 public static final UnicodeBlock HANGUL_JAMO = 978 new UnicodeBlock("HANGUL_JAMO", 979 "HANGUL JAMO", 980 "HANGULJAMO"); 981 982 /** 983 * Constant for the "Latin Extended Additional" Unicode character block. 984 * @since 1.2 985 */ 986 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 987 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 988 "LATIN EXTENDED ADDITIONAL", 989 "LATINEXTENDEDADDITIONAL"); 990 991 /** 992 * Constant for the "Greek Extended" Unicode character block. 993 * @since 1.2 994 */ 995 public static final UnicodeBlock GREEK_EXTENDED = 996 new UnicodeBlock("GREEK_EXTENDED", 997 "GREEK EXTENDED", 998 "GREEKEXTENDED"); 999 1000 /** 1001 * Constant for the "General Punctuation" Unicode character block. 1002 * @since 1.2 1003 */ 1004 public static final UnicodeBlock GENERAL_PUNCTUATION = 1005 new UnicodeBlock("GENERAL_PUNCTUATION", 1006 "GENERAL PUNCTUATION", 1007 "GENERALPUNCTUATION"); 1008 1009 /** 1010 * Constant for the "Superscripts and Subscripts" Unicode character 1011 * block. 1012 * @since 1.2 1013 */ 1014 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1015 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1016 "SUPERSCRIPTS AND SUBSCRIPTS", 1017 "SUPERSCRIPTSANDSUBSCRIPTS"); 1018 1019 /** 1020 * Constant for the "Currency Symbols" Unicode character block. 1021 * @since 1.2 1022 */ 1023 public static final UnicodeBlock CURRENCY_SYMBOLS = 1024 new UnicodeBlock("CURRENCY_SYMBOLS", 1025 "CURRENCY SYMBOLS", 1026 "CURRENCYSYMBOLS"); 1027 1028 /** 1029 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1030 * character block. 1031 * <p> 1032 * This block was previously known as "Combining Marks for Symbols". 1033 * @since 1.2 1034 */ 1035 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1036 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1037 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1038 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1039 "COMBINING MARKS FOR SYMBOLS", 1040 "COMBININGMARKSFORSYMBOLS"); 1041 1042 /** 1043 * Constant for the "Letterlike Symbols" Unicode character block. 1044 * @since 1.2 1045 */ 1046 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1047 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1048 "LETTERLIKE SYMBOLS", 1049 "LETTERLIKESYMBOLS"); 1050 1051 /** 1052 * Constant for the "Number Forms" Unicode character block. 1053 * @since 1.2 1054 */ 1055 public static final UnicodeBlock NUMBER_FORMS = 1056 new UnicodeBlock("NUMBER_FORMS", 1057 "NUMBER FORMS", 1058 "NUMBERFORMS"); 1059 1060 /** 1061 * Constant for the "Arrows" Unicode character block. 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock ARROWS = 1065 new UnicodeBlock("ARROWS"); 1066 1067 /** 1068 * Constant for the "Mathematical Operators" Unicode character block. 1069 * @since 1.2 1070 */ 1071 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1072 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1073 "MATHEMATICAL OPERATORS", 1074 "MATHEMATICALOPERATORS"); 1075 1076 /** 1077 * Constant for the "Miscellaneous Technical" Unicode character block. 1078 * @since 1.2 1079 */ 1080 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1081 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1082 "MISCELLANEOUS TECHNICAL", 1083 "MISCELLANEOUSTECHNICAL"); 1084 1085 /** 1086 * Constant for the "Control Pictures" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock CONTROL_PICTURES = 1090 new UnicodeBlock("CONTROL_PICTURES", 1091 "CONTROL PICTURES", 1092 "CONTROLPICTURES"); 1093 1094 /** 1095 * Constant for the "Optical Character Recognition" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1099 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1100 "OPTICAL CHARACTER RECOGNITION", 1101 "OPTICALCHARACTERRECOGNITION"); 1102 1103 /** 1104 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1105 * @since 1.2 1106 */ 1107 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1108 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1109 "ENCLOSED ALPHANUMERICS", 1110 "ENCLOSEDALPHANUMERICS"); 1111 1112 /** 1113 * Constant for the "Box Drawing" Unicode character block. 1114 * @since 1.2 1115 */ 1116 public static final UnicodeBlock BOX_DRAWING = 1117 new UnicodeBlock("BOX_DRAWING", 1118 "BOX DRAWING", 1119 "BOXDRAWING"); 1120 1121 /** 1122 * Constant for the "Block Elements" Unicode character block. 1123 * @since 1.2 1124 */ 1125 public static final UnicodeBlock BLOCK_ELEMENTS = 1126 new UnicodeBlock("BLOCK_ELEMENTS", 1127 "BLOCK ELEMENTS", 1128 "BLOCKELEMENTS"); 1129 1130 /** 1131 * Constant for the "Geometric Shapes" Unicode character block. 1132 * @since 1.2 1133 */ 1134 public static final UnicodeBlock GEOMETRIC_SHAPES = 1135 new UnicodeBlock("GEOMETRIC_SHAPES", 1136 "GEOMETRIC SHAPES", 1137 "GEOMETRICSHAPES"); 1138 1139 /** 1140 * Constant for the "Miscellaneous Symbols" Unicode character block. 1141 * @since 1.2 1142 */ 1143 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1144 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1145 "MISCELLANEOUS SYMBOLS", 1146 "MISCELLANEOUSSYMBOLS"); 1147 1148 /** 1149 * Constant for the "Dingbats" Unicode character block. 1150 * @since 1.2 1151 */ 1152 public static final UnicodeBlock DINGBATS = 1153 new UnicodeBlock("DINGBATS"); 1154 1155 /** 1156 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1157 * @since 1.2 1158 */ 1159 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1160 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1161 "CJK SYMBOLS AND PUNCTUATION", 1162 "CJKSYMBOLSANDPUNCTUATION"); 1163 1164 /** 1165 * Constant for the "Hiragana" Unicode character block. 1166 * @since 1.2 1167 */ 1168 public static final UnicodeBlock HIRAGANA = 1169 new UnicodeBlock("HIRAGANA"); 1170 1171 /** 1172 * Constant for the "Katakana" Unicode character block. 1173 * @since 1.2 1174 */ 1175 public static final UnicodeBlock KATAKANA = 1176 new UnicodeBlock("KATAKANA"); 1177 1178 /** 1179 * Constant for the "Bopomofo" Unicode character block. 1180 * @since 1.2 1181 */ 1182 public static final UnicodeBlock BOPOMOFO = 1183 new UnicodeBlock("BOPOMOFO"); 1184 1185 /** 1186 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1187 * @since 1.2 1188 */ 1189 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1190 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1191 "HANGUL COMPATIBILITY JAMO", 1192 "HANGULCOMPATIBILITYJAMO"); 1193 1194 /** 1195 * Constant for the "Kanbun" Unicode character block. 1196 * @since 1.2 1197 */ 1198 public static final UnicodeBlock KANBUN = 1199 new UnicodeBlock("KANBUN"); 1200 1201 /** 1202 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1203 * @since 1.2 1204 */ 1205 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1206 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1207 "ENCLOSED CJK LETTERS AND MONTHS", 1208 "ENCLOSEDCJKLETTERSANDMONTHS"); 1209 1210 /** 1211 * Constant for the "CJK Compatibility" Unicode character block. 1212 * @since 1.2 1213 */ 1214 public static final UnicodeBlock CJK_COMPATIBILITY = 1215 new UnicodeBlock("CJK_COMPATIBILITY", 1216 "CJK COMPATIBILITY", 1217 "CJKCOMPATIBILITY"); 1218 1219 /** 1220 * Constant for the "CJK Unified Ideographs" Unicode character block. 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1224 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1225 "CJK UNIFIED IDEOGRAPHS", 1226 "CJKUNIFIEDIDEOGRAPHS"); 1227 1228 /** 1229 * Constant for the "Hangul Syllables" Unicode character block. 1230 * @since 1.2 1231 */ 1232 public static final UnicodeBlock HANGUL_SYLLABLES = 1233 new UnicodeBlock("HANGUL_SYLLABLES", 1234 "HANGUL SYLLABLES", 1235 "HANGULSYLLABLES"); 1236 1237 /** 1238 * Constant for the "Private Use Area" Unicode character block. 1239 * @since 1.2 1240 */ 1241 public static final UnicodeBlock PRIVATE_USE_AREA = 1242 new UnicodeBlock("PRIVATE_USE_AREA", 1243 "PRIVATE USE AREA", 1244 "PRIVATEUSEAREA"); 1245 1246 /** 1247 * Constant for the "CJK Compatibility Ideographs" Unicode character 1248 * block. 1249 * @since 1.2 1250 */ 1251 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1252 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1253 "CJK COMPATIBILITY IDEOGRAPHS", 1254 "CJKCOMPATIBILITYIDEOGRAPHS"); 1255 1256 /** 1257 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1258 * @since 1.2 1259 */ 1260 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1261 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1262 "ALPHABETIC PRESENTATION FORMS", 1263 "ALPHABETICPRESENTATIONFORMS"); 1264 1265 /** 1266 * Constant for the "Arabic Presentation Forms-A" Unicode character 1267 * block. 1268 * @since 1.2 1269 */ 1270 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1271 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1272 "ARABIC PRESENTATION FORMS-A", 1273 "ARABICPRESENTATIONFORMS-A"); 1274 1275 /** 1276 * Constant for the "Combining Half Marks" Unicode character block. 1277 * @since 1.2 1278 */ 1279 public static final UnicodeBlock COMBINING_HALF_MARKS = 1280 new UnicodeBlock("COMBINING_HALF_MARKS", 1281 "COMBINING HALF MARKS", 1282 "COMBININGHALFMARKS"); 1283 1284 /** 1285 * Constant for the "CJK Compatibility Forms" Unicode character block. 1286 * @since 1.2 1287 */ 1288 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1289 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1290 "CJK COMPATIBILITY FORMS", 1291 "CJKCOMPATIBILITYFORMS"); 1292 1293 /** 1294 * Constant for the "Small Form Variants" Unicode character block. 1295 * @since 1.2 1296 */ 1297 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1298 new UnicodeBlock("SMALL_FORM_VARIANTS", 1299 "SMALL FORM VARIANTS", 1300 "SMALLFORMVARIANTS"); 1301 1302 /** 1303 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1304 * @since 1.2 1305 */ 1306 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1307 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1308 "ARABIC PRESENTATION FORMS-B", 1309 "ARABICPRESENTATIONFORMS-B"); 1310 1311 /** 1312 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1313 * block. 1314 * @since 1.2 1315 */ 1316 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1317 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1318 "HALFWIDTH AND FULLWIDTH FORMS", 1319 "HALFWIDTHANDFULLWIDTHFORMS"); 1320 1321 /** 1322 * Constant for the "Specials" Unicode character block. 1323 * @since 1.2 1324 */ 1325 public static final UnicodeBlock SPECIALS = 1326 new UnicodeBlock("SPECIALS"); 1327 1328 /** 1329 * @deprecated 1330 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1331 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1332 * These constants match the block definitions of the Unicode Standard. 1333 * The {@link #of(char)} and {@link #of(int)} methods return the 1334 * standard constants. 1335 */ 1336 @Deprecated(since="1.5") 1337 public static final UnicodeBlock SURROGATES_AREA = 1338 new UnicodeBlock("SURROGATES_AREA"); 1339 1340 /** 1341 * Constant for the "Syriac" Unicode character block. 1342 * @since 1.4 1343 */ 1344 public static final UnicodeBlock SYRIAC = 1345 new UnicodeBlock("SYRIAC"); 1346 1347 /** 1348 * Constant for the "Thaana" Unicode character block. 1349 * @since 1.4 1350 */ 1351 public static final UnicodeBlock THAANA = 1352 new UnicodeBlock("THAANA"); 1353 1354 /** 1355 * Constant for the "Sinhala" Unicode character block. 1356 * @since 1.4 1357 */ 1358 public static final UnicodeBlock SINHALA = 1359 new UnicodeBlock("SINHALA"); 1360 1361 /** 1362 * Constant for the "Myanmar" Unicode character block. 1363 * @since 1.4 1364 */ 1365 public static final UnicodeBlock MYANMAR = 1366 new UnicodeBlock("MYANMAR"); 1367 1368 /** 1369 * Constant for the "Ethiopic" Unicode character block. 1370 * @since 1.4 1371 */ 1372 public static final UnicodeBlock ETHIOPIC = 1373 new UnicodeBlock("ETHIOPIC"); 1374 1375 /** 1376 * Constant for the "Cherokee" Unicode character block. 1377 * @since 1.4 1378 */ 1379 public static final UnicodeBlock CHEROKEE = 1380 new UnicodeBlock("CHEROKEE"); 1381 1382 /** 1383 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1384 * @since 1.4 1385 */ 1386 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1387 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1388 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1389 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1390 1391 /** 1392 * Constant for the "Ogham" Unicode character block. 1393 * @since 1.4 1394 */ 1395 public static final UnicodeBlock OGHAM = 1396 new UnicodeBlock("OGHAM"); 1397 1398 /** 1399 * Constant for the "Runic" Unicode character block. 1400 * @since 1.4 1401 */ 1402 public static final UnicodeBlock RUNIC = 1403 new UnicodeBlock("RUNIC"); 1404 1405 /** 1406 * Constant for the "Khmer" Unicode character block. 1407 * @since 1.4 1408 */ 1409 public static final UnicodeBlock KHMER = 1410 new UnicodeBlock("KHMER"); 1411 1412 /** 1413 * Constant for the "Mongolian" Unicode character block. 1414 * @since 1.4 1415 */ 1416 public static final UnicodeBlock MONGOLIAN = 1417 new UnicodeBlock("MONGOLIAN"); 1418 1419 /** 1420 * Constant for the "Braille Patterns" Unicode character block. 1421 * @since 1.4 1422 */ 1423 public static final UnicodeBlock BRAILLE_PATTERNS = 1424 new UnicodeBlock("BRAILLE_PATTERNS", 1425 "BRAILLE PATTERNS", 1426 "BRAILLEPATTERNS"); 1427 1428 /** 1429 * Constant for the "CJK Radicals Supplement" Unicode character block. 1430 * @since 1.4 1431 */ 1432 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1433 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1434 "CJK RADICALS SUPPLEMENT", 1435 "CJKRADICALSSUPPLEMENT"); 1436 1437 /** 1438 * Constant for the "Kangxi Radicals" Unicode character block. 1439 * @since 1.4 1440 */ 1441 public static final UnicodeBlock KANGXI_RADICALS = 1442 new UnicodeBlock("KANGXI_RADICALS", 1443 "KANGXI RADICALS", 1444 "KANGXIRADICALS"); 1445 1446 /** 1447 * Constant for the "Ideographic Description Characters" Unicode character block. 1448 * @since 1.4 1449 */ 1450 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1451 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1452 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1453 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1454 1455 /** 1456 * Constant for the "Bopomofo Extended" Unicode character block. 1457 * @since 1.4 1458 */ 1459 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1460 new UnicodeBlock("BOPOMOFO_EXTENDED", 1461 "BOPOMOFO EXTENDED", 1462 "BOPOMOFOEXTENDED"); 1463 1464 /** 1465 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1466 * @since 1.4 1467 */ 1468 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1469 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1470 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1471 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1472 1473 /** 1474 * Constant for the "Yi Syllables" Unicode character block. 1475 * @since 1.4 1476 */ 1477 public static final UnicodeBlock YI_SYLLABLES = 1478 new UnicodeBlock("YI_SYLLABLES", 1479 "YI SYLLABLES", 1480 "YISYLLABLES"); 1481 1482 /** 1483 * Constant for the "Yi Radicals" Unicode character block. 1484 * @since 1.4 1485 */ 1486 public static final UnicodeBlock YI_RADICALS = 1487 new UnicodeBlock("YI_RADICALS", 1488 "YI RADICALS", 1489 "YIRADICALS"); 1490 1491 /** 1492 * Constant for the "Cyrillic Supplement" Unicode character block. 1493 * This block was previously known as the "Cyrillic Supplementary" block. 1494 * @since 1.5 1495 */ 1496 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1497 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1498 "CYRILLIC SUPPLEMENTARY", 1499 "CYRILLICSUPPLEMENTARY", 1500 "CYRILLIC SUPPLEMENT", 1501 "CYRILLICSUPPLEMENT"); 1502 1503 /** 1504 * Constant for the "Tagalog" Unicode character block. 1505 * @since 1.5 1506 */ 1507 public static final UnicodeBlock TAGALOG = 1508 new UnicodeBlock("TAGALOG"); 1509 1510 /** 1511 * Constant for the "Hanunoo" Unicode character block. 1512 * @since 1.5 1513 */ 1514 public static final UnicodeBlock HANUNOO = 1515 new UnicodeBlock("HANUNOO"); 1516 1517 /** 1518 * Constant for the "Buhid" Unicode character block. 1519 * @since 1.5 1520 */ 1521 public static final UnicodeBlock BUHID = 1522 new UnicodeBlock("BUHID"); 1523 1524 /** 1525 * Constant for the "Tagbanwa" Unicode character block. 1526 * @since 1.5 1527 */ 1528 public static final UnicodeBlock TAGBANWA = 1529 new UnicodeBlock("TAGBANWA"); 1530 1531 /** 1532 * Constant for the "Limbu" Unicode character block. 1533 * @since 1.5 1534 */ 1535 public static final UnicodeBlock LIMBU = 1536 new UnicodeBlock("LIMBU"); 1537 1538 /** 1539 * Constant for the "Tai Le" Unicode character block. 1540 * @since 1.5 1541 */ 1542 public static final UnicodeBlock TAI_LE = 1543 new UnicodeBlock("TAI_LE", 1544 "TAI LE", 1545 "TAILE"); 1546 1547 /** 1548 * Constant for the "Khmer Symbols" Unicode character block. 1549 * @since 1.5 1550 */ 1551 public static final UnicodeBlock KHMER_SYMBOLS = 1552 new UnicodeBlock("KHMER_SYMBOLS", 1553 "KHMER SYMBOLS", 1554 "KHMERSYMBOLS"); 1555 1556 /** 1557 * Constant for the "Phonetic Extensions" Unicode character block. 1558 * @since 1.5 1559 */ 1560 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1561 new UnicodeBlock("PHONETIC_EXTENSIONS", 1562 "PHONETIC EXTENSIONS", 1563 "PHONETICEXTENSIONS"); 1564 1565 /** 1566 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1567 * @since 1.5 1568 */ 1569 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1570 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1571 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1572 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1573 1574 /** 1575 * Constant for the "Supplemental Arrows-A" Unicode character block. 1576 * @since 1.5 1577 */ 1578 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1579 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1580 "SUPPLEMENTAL ARROWS-A", 1581 "SUPPLEMENTALARROWS-A"); 1582 1583 /** 1584 * Constant for the "Supplemental Arrows-B" Unicode character block. 1585 * @since 1.5 1586 */ 1587 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1588 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1589 "SUPPLEMENTAL ARROWS-B", 1590 "SUPPLEMENTALARROWS-B"); 1591 1592 /** 1593 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1594 * character block. 1595 * @since 1.5 1596 */ 1597 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1598 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1599 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1600 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1601 1602 /** 1603 * Constant for the "Supplemental Mathematical Operators" Unicode 1604 * character block. 1605 * @since 1.5 1606 */ 1607 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1608 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1609 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1610 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1611 1612 /** 1613 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1614 * block. 1615 * @since 1.5 1616 */ 1617 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1618 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1619 "MISCELLANEOUS SYMBOLS AND ARROWS", 1620 "MISCELLANEOUSSYMBOLSANDARROWS"); 1621 1622 /** 1623 * Constant for the "Katakana Phonetic Extensions" Unicode character 1624 * block. 1625 * @since 1.5 1626 */ 1627 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1628 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1629 "KATAKANA PHONETIC EXTENSIONS", 1630 "KATAKANAPHONETICEXTENSIONS"); 1631 1632 /** 1633 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1634 * @since 1.5 1635 */ 1636 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1637 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1638 "YIJING HEXAGRAM SYMBOLS", 1639 "YIJINGHEXAGRAMSYMBOLS"); 1640 1641 /** 1642 * Constant for the "Variation Selectors" Unicode character block. 1643 * @since 1.5 1644 */ 1645 public static final UnicodeBlock VARIATION_SELECTORS = 1646 new UnicodeBlock("VARIATION_SELECTORS", 1647 "VARIATION SELECTORS", 1648 "VARIATIONSELECTORS"); 1649 1650 /** 1651 * Constant for the "Linear B Syllabary" Unicode character block. 1652 * @since 1.5 1653 */ 1654 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1655 new UnicodeBlock("LINEAR_B_SYLLABARY", 1656 "LINEAR B SYLLABARY", 1657 "LINEARBSYLLABARY"); 1658 1659 /** 1660 * Constant for the "Linear B Ideograms" Unicode character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1664 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1665 "LINEAR B IDEOGRAMS", 1666 "LINEARBIDEOGRAMS"); 1667 1668 /** 1669 * Constant for the "Aegean Numbers" Unicode character block. 1670 * @since 1.5 1671 */ 1672 public static final UnicodeBlock AEGEAN_NUMBERS = 1673 new UnicodeBlock("AEGEAN_NUMBERS", 1674 "AEGEAN NUMBERS", 1675 "AEGEANNUMBERS"); 1676 1677 /** 1678 * Constant for the "Old Italic" Unicode character block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock OLD_ITALIC = 1682 new UnicodeBlock("OLD_ITALIC", 1683 "OLD ITALIC", 1684 "OLDITALIC"); 1685 1686 /** 1687 * Constant for the "Gothic" Unicode character block. 1688 * @since 1.5 1689 */ 1690 public static final UnicodeBlock GOTHIC = 1691 new UnicodeBlock("GOTHIC"); 1692 1693 /** 1694 * Constant for the "Ugaritic" Unicode character block. 1695 * @since 1.5 1696 */ 1697 public static final UnicodeBlock UGARITIC = 1698 new UnicodeBlock("UGARITIC"); 1699 1700 /** 1701 * Constant for the "Deseret" Unicode character block. 1702 * @since 1.5 1703 */ 1704 public static final UnicodeBlock DESERET = 1705 new UnicodeBlock("DESERET"); 1706 1707 /** 1708 * Constant for the "Shavian" Unicode character block. 1709 * @since 1.5 1710 */ 1711 public static final UnicodeBlock SHAVIAN = 1712 new UnicodeBlock("SHAVIAN"); 1713 1714 /** 1715 * Constant for the "Osmanya" Unicode character block. 1716 * @since 1.5 1717 */ 1718 public static final UnicodeBlock OSMANYA = 1719 new UnicodeBlock("OSMANYA"); 1720 1721 /** 1722 * Constant for the "Cypriot Syllabary" Unicode character block. 1723 * @since 1.5 1724 */ 1725 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1726 new UnicodeBlock("CYPRIOT_SYLLABARY", 1727 "CYPRIOT SYLLABARY", 1728 "CYPRIOTSYLLABARY"); 1729 1730 /** 1731 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1732 * @since 1.5 1733 */ 1734 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1735 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1736 "BYZANTINE MUSICAL SYMBOLS", 1737 "BYZANTINEMUSICALSYMBOLS"); 1738 1739 /** 1740 * Constant for the "Musical Symbols" Unicode character block. 1741 * @since 1.5 1742 */ 1743 public static final UnicodeBlock MUSICAL_SYMBOLS = 1744 new UnicodeBlock("MUSICAL_SYMBOLS", 1745 "MUSICAL SYMBOLS", 1746 "MUSICALSYMBOLS"); 1747 1748 /** 1749 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1750 * @since 1.5 1751 */ 1752 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1753 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1754 "TAI XUAN JING SYMBOLS", 1755 "TAIXUANJINGSYMBOLS"); 1756 1757 /** 1758 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1759 * character block. 1760 * @since 1.5 1761 */ 1762 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1763 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1764 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1765 "MATHEMATICALALPHANUMERICSYMBOLS"); 1766 1767 /** 1768 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1769 * character block. 1770 * @since 1.5 1771 */ 1772 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1773 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1774 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1775 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1776 1777 /** 1778 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1779 * @since 1.5 1780 */ 1781 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1782 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1783 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1784 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1785 1786 /** 1787 * Constant for the "Tags" Unicode character block. 1788 * @since 1.5 1789 */ 1790 public static final UnicodeBlock TAGS = 1791 new UnicodeBlock("TAGS"); 1792 1793 /** 1794 * Constant for the "Variation Selectors Supplement" Unicode character 1795 * block. 1796 * @since 1.5 1797 */ 1798 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1799 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1800 "VARIATION SELECTORS SUPPLEMENT", 1801 "VARIATIONSELECTORSSUPPLEMENT"); 1802 1803 /** 1804 * Constant for the "Supplementary Private Use Area-A" Unicode character 1805 * block. 1806 * @since 1.5 1807 */ 1808 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1809 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1810 "SUPPLEMENTARY PRIVATE USE AREA-A", 1811 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1812 1813 /** 1814 * Constant for the "Supplementary Private Use Area-B" Unicode character 1815 * block. 1816 * @since 1.5 1817 */ 1818 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1819 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1820 "SUPPLEMENTARY PRIVATE USE AREA-B", 1821 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1822 1823 /** 1824 * Constant for the "High Surrogates" Unicode character block. 1825 * This block represents codepoint values in the high surrogate 1826 * range: U+D800 through U+DB7F 1827 * 1828 * @since 1.5 1829 */ 1830 public static final UnicodeBlock HIGH_SURROGATES = 1831 new UnicodeBlock("HIGH_SURROGATES", 1832 "HIGH SURROGATES", 1833 "HIGHSURROGATES"); 1834 1835 /** 1836 * Constant for the "High Private Use Surrogates" Unicode character 1837 * block. 1838 * This block represents codepoint values in the private use high 1839 * surrogate range: U+DB80 through U+DBFF 1840 * 1841 * @since 1.5 1842 */ 1843 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1844 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1845 "HIGH PRIVATE USE SURROGATES", 1846 "HIGHPRIVATEUSESURROGATES"); 1847 1848 /** 1849 * Constant for the "Low Surrogates" Unicode character block. 1850 * This block represents codepoint values in the low surrogate 1851 * range: U+DC00 through U+DFFF 1852 * 1853 * @since 1.5 1854 */ 1855 public static final UnicodeBlock LOW_SURROGATES = 1856 new UnicodeBlock("LOW_SURROGATES", 1857 "LOW SURROGATES", 1858 "LOWSURROGATES"); 1859 1860 /** 1861 * Constant for the "Arabic Supplement" Unicode character block. 1862 * @since 1.7 1863 */ 1864 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1865 new UnicodeBlock("ARABIC_SUPPLEMENT", 1866 "ARABIC SUPPLEMENT", 1867 "ARABICSUPPLEMENT"); 1868 1869 /** 1870 * Constant for the "NKo" Unicode character block. 1871 * @since 1.7 1872 */ 1873 public static final UnicodeBlock NKO = 1874 new UnicodeBlock("NKO"); 1875 1876 /** 1877 * Constant for the "Samaritan" Unicode character block. 1878 * @since 1.7 1879 */ 1880 public static final UnicodeBlock SAMARITAN = 1881 new UnicodeBlock("SAMARITAN"); 1882 1883 /** 1884 * Constant for the "Mandaic" Unicode character block. 1885 * @since 1.7 1886 */ 1887 public static final UnicodeBlock MANDAIC = 1888 new UnicodeBlock("MANDAIC"); 1889 1890 /** 1891 * Constant for the "Ethiopic Supplement" Unicode character block. 1892 * @since 1.7 1893 */ 1894 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1895 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1896 "ETHIOPIC SUPPLEMENT", 1897 "ETHIOPICSUPPLEMENT"); 1898 1899 /** 1900 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1901 * Unicode character block. 1902 * @since 1.7 1903 */ 1904 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1905 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1906 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1907 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1908 1909 /** 1910 * Constant for the "New Tai Lue" Unicode character block. 1911 * @since 1.7 1912 */ 1913 public static final UnicodeBlock NEW_TAI_LUE = 1914 new UnicodeBlock("NEW_TAI_LUE", 1915 "NEW TAI LUE", 1916 "NEWTAILUE"); 1917 1918 /** 1919 * Constant for the "Buginese" Unicode character block. 1920 * @since 1.7 1921 */ 1922 public static final UnicodeBlock BUGINESE = 1923 new UnicodeBlock("BUGINESE"); 1924 1925 /** 1926 * Constant for the "Tai Tham" Unicode character block. 1927 * @since 1.7 1928 */ 1929 public static final UnicodeBlock TAI_THAM = 1930 new UnicodeBlock("TAI_THAM", 1931 "TAI THAM", 1932 "TAITHAM"); 1933 1934 /** 1935 * Constant for the "Balinese" Unicode character block. 1936 * @since 1.7 1937 */ 1938 public static final UnicodeBlock BALINESE = 1939 new UnicodeBlock("BALINESE"); 1940 1941 /** 1942 * Constant for the "Sundanese" Unicode character block. 1943 * @since 1.7 1944 */ 1945 public static final UnicodeBlock SUNDANESE = 1946 new UnicodeBlock("SUNDANESE"); 1947 1948 /** 1949 * Constant for the "Batak" Unicode character block. 1950 * @since 1.7 1951 */ 1952 public static final UnicodeBlock BATAK = 1953 new UnicodeBlock("BATAK"); 1954 1955 /** 1956 * Constant for the "Lepcha" Unicode character block. 1957 * @since 1.7 1958 */ 1959 public static final UnicodeBlock LEPCHA = 1960 new UnicodeBlock("LEPCHA"); 1961 1962 /** 1963 * Constant for the "Ol Chiki" Unicode character block. 1964 * @since 1.7 1965 */ 1966 public static final UnicodeBlock OL_CHIKI = 1967 new UnicodeBlock("OL_CHIKI", 1968 "OL CHIKI", 1969 "OLCHIKI"); 1970 1971 /** 1972 * Constant for the "Vedic Extensions" Unicode character block. 1973 * @since 1.7 1974 */ 1975 public static final UnicodeBlock VEDIC_EXTENSIONS = 1976 new UnicodeBlock("VEDIC_EXTENSIONS", 1977 "VEDIC EXTENSIONS", 1978 "VEDICEXTENSIONS"); 1979 1980 /** 1981 * Constant for the "Phonetic Extensions Supplement" Unicode character 1982 * block. 1983 * @since 1.7 1984 */ 1985 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1986 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1987 "PHONETIC EXTENSIONS SUPPLEMENT", 1988 "PHONETICEXTENSIONSSUPPLEMENT"); 1989 1990 /** 1991 * Constant for the "Combining Diacritical Marks Supplement" Unicode 1992 * character block. 1993 * @since 1.7 1994 */ 1995 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1996 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1997 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 1998 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 1999 2000 /** 2001 * Constant for the "Glagolitic" Unicode character block. 2002 * @since 1.7 2003 */ 2004 public static final UnicodeBlock GLAGOLITIC = 2005 new UnicodeBlock("GLAGOLITIC"); 2006 2007 /** 2008 * Constant for the "Latin Extended-C" Unicode character block. 2009 * @since 1.7 2010 */ 2011 public static final UnicodeBlock LATIN_EXTENDED_C = 2012 new UnicodeBlock("LATIN_EXTENDED_C", 2013 "LATIN EXTENDED-C", 2014 "LATINEXTENDED-C"); 2015 2016 /** 2017 * Constant for the "Coptic" Unicode character block. 2018 * @since 1.7 2019 */ 2020 public static final UnicodeBlock COPTIC = 2021 new UnicodeBlock("COPTIC"); 2022 2023 /** 2024 * Constant for the "Georgian Supplement" Unicode character block. 2025 * @since 1.7 2026 */ 2027 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2028 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2029 "GEORGIAN SUPPLEMENT", 2030 "GEORGIANSUPPLEMENT"); 2031 2032 /** 2033 * Constant for the "Tifinagh" Unicode character block. 2034 * @since 1.7 2035 */ 2036 public static final UnicodeBlock TIFINAGH = 2037 new UnicodeBlock("TIFINAGH"); 2038 2039 /** 2040 * Constant for the "Ethiopic Extended" Unicode character block. 2041 * @since 1.7 2042 */ 2043 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2044 new UnicodeBlock("ETHIOPIC_EXTENDED", 2045 "ETHIOPIC EXTENDED", 2046 "ETHIOPICEXTENDED"); 2047 2048 /** 2049 * Constant for the "Cyrillic Extended-A" Unicode character block. 2050 * @since 1.7 2051 */ 2052 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2053 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2054 "CYRILLIC EXTENDED-A", 2055 "CYRILLICEXTENDED-A"); 2056 2057 /** 2058 * Constant for the "Supplemental Punctuation" Unicode character block. 2059 * @since 1.7 2060 */ 2061 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2062 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2063 "SUPPLEMENTAL PUNCTUATION", 2064 "SUPPLEMENTALPUNCTUATION"); 2065 2066 /** 2067 * Constant for the "CJK Strokes" Unicode character block. 2068 * @since 1.7 2069 */ 2070 public static final UnicodeBlock CJK_STROKES = 2071 new UnicodeBlock("CJK_STROKES", 2072 "CJK STROKES", 2073 "CJKSTROKES"); 2074 2075 /** 2076 * Constant for the "Lisu" Unicode character block. 2077 * @since 1.7 2078 */ 2079 public static final UnicodeBlock LISU = 2080 new UnicodeBlock("LISU"); 2081 2082 /** 2083 * Constant for the "Vai" Unicode character block. 2084 * @since 1.7 2085 */ 2086 public static final UnicodeBlock VAI = 2087 new UnicodeBlock("VAI"); 2088 2089 /** 2090 * Constant for the "Cyrillic Extended-B" Unicode character block. 2091 * @since 1.7 2092 */ 2093 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2094 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2095 "CYRILLIC EXTENDED-B", 2096 "CYRILLICEXTENDED-B"); 2097 2098 /** 2099 * Constant for the "Bamum" Unicode character block. 2100 * @since 1.7 2101 */ 2102 public static final UnicodeBlock BAMUM = 2103 new UnicodeBlock("BAMUM"); 2104 2105 /** 2106 * Constant for the "Modifier Tone Letters" Unicode character block. 2107 * @since 1.7 2108 */ 2109 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2110 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2111 "MODIFIER TONE LETTERS", 2112 "MODIFIERTONELETTERS"); 2113 2114 /** 2115 * Constant for the "Latin Extended-D" Unicode character block. 2116 * @since 1.7 2117 */ 2118 public static final UnicodeBlock LATIN_EXTENDED_D = 2119 new UnicodeBlock("LATIN_EXTENDED_D", 2120 "LATIN EXTENDED-D", 2121 "LATINEXTENDED-D"); 2122 2123 /** 2124 * Constant for the "Syloti Nagri" Unicode character block. 2125 * @since 1.7 2126 */ 2127 public static final UnicodeBlock SYLOTI_NAGRI = 2128 new UnicodeBlock("SYLOTI_NAGRI", 2129 "SYLOTI NAGRI", 2130 "SYLOTINAGRI"); 2131 2132 /** 2133 * Constant for the "Common Indic Number Forms" Unicode character block. 2134 * @since 1.7 2135 */ 2136 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2137 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2138 "COMMON INDIC NUMBER FORMS", 2139 "COMMONINDICNUMBERFORMS"); 2140 2141 /** 2142 * Constant for the "Phags-pa" Unicode character block. 2143 * @since 1.7 2144 */ 2145 public static final UnicodeBlock PHAGS_PA = 2146 new UnicodeBlock("PHAGS_PA", 2147 "PHAGS-PA"); 2148 2149 /** 2150 * Constant for the "Saurashtra" Unicode character block. 2151 * @since 1.7 2152 */ 2153 public static final UnicodeBlock SAURASHTRA = 2154 new UnicodeBlock("SAURASHTRA"); 2155 2156 /** 2157 * Constant for the "Devanagari Extended" Unicode character block. 2158 * @since 1.7 2159 */ 2160 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2161 new UnicodeBlock("DEVANAGARI_EXTENDED", 2162 "DEVANAGARI EXTENDED", 2163 "DEVANAGARIEXTENDED"); 2164 2165 /** 2166 * Constant for the "Kayah Li" Unicode character block. 2167 * @since 1.7 2168 */ 2169 public static final UnicodeBlock KAYAH_LI = 2170 new UnicodeBlock("KAYAH_LI", 2171 "KAYAH LI", 2172 "KAYAHLI"); 2173 2174 /** 2175 * Constant for the "Rejang" Unicode character block. 2176 * @since 1.7 2177 */ 2178 public static final UnicodeBlock REJANG = 2179 new UnicodeBlock("REJANG"); 2180 2181 /** 2182 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2183 * @since 1.7 2184 */ 2185 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2186 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2187 "HANGUL JAMO EXTENDED-A", 2188 "HANGULJAMOEXTENDED-A"); 2189 2190 /** 2191 * Constant for the "Javanese" Unicode character block. 2192 * @since 1.7 2193 */ 2194 public static final UnicodeBlock JAVANESE = 2195 new UnicodeBlock("JAVANESE"); 2196 2197 /** 2198 * Constant for the "Cham" Unicode character block. 2199 * @since 1.7 2200 */ 2201 public static final UnicodeBlock CHAM = 2202 new UnicodeBlock("CHAM"); 2203 2204 /** 2205 * Constant for the "Myanmar Extended-A" Unicode character block. 2206 * @since 1.7 2207 */ 2208 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2209 new UnicodeBlock("MYANMAR_EXTENDED_A", 2210 "MYANMAR EXTENDED-A", 2211 "MYANMAREXTENDED-A"); 2212 2213 /** 2214 * Constant for the "Tai Viet" Unicode character block. 2215 * @since 1.7 2216 */ 2217 public static final UnicodeBlock TAI_VIET = 2218 new UnicodeBlock("TAI_VIET", 2219 "TAI VIET", 2220 "TAIVIET"); 2221 2222 /** 2223 * Constant for the "Ethiopic Extended-A" Unicode character block. 2224 * @since 1.7 2225 */ 2226 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2227 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2228 "ETHIOPIC EXTENDED-A", 2229 "ETHIOPICEXTENDED-A"); 2230 2231 /** 2232 * Constant for the "Meetei Mayek" Unicode character block. 2233 * @since 1.7 2234 */ 2235 public static final UnicodeBlock MEETEI_MAYEK = 2236 new UnicodeBlock("MEETEI_MAYEK", 2237 "MEETEI MAYEK", 2238 "MEETEIMAYEK"); 2239 2240 /** 2241 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2242 * @since 1.7 2243 */ 2244 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2245 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2246 "HANGUL JAMO EXTENDED-B", 2247 "HANGULJAMOEXTENDED-B"); 2248 2249 /** 2250 * Constant for the "Vertical Forms" Unicode character block. 2251 * @since 1.7 2252 */ 2253 public static final UnicodeBlock VERTICAL_FORMS = 2254 new UnicodeBlock("VERTICAL_FORMS", 2255 "VERTICAL FORMS", 2256 "VERTICALFORMS"); 2257 2258 /** 2259 * Constant for the "Ancient Greek Numbers" Unicode character block. 2260 * @since 1.7 2261 */ 2262 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2263 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2264 "ANCIENT GREEK NUMBERS", 2265 "ANCIENTGREEKNUMBERS"); 2266 2267 /** 2268 * Constant for the "Ancient Symbols" Unicode character block. 2269 * @since 1.7 2270 */ 2271 public static final UnicodeBlock ANCIENT_SYMBOLS = 2272 new UnicodeBlock("ANCIENT_SYMBOLS", 2273 "ANCIENT SYMBOLS", 2274 "ANCIENTSYMBOLS"); 2275 2276 /** 2277 * Constant for the "Phaistos Disc" Unicode character block. 2278 * @since 1.7 2279 */ 2280 public static final UnicodeBlock PHAISTOS_DISC = 2281 new UnicodeBlock("PHAISTOS_DISC", 2282 "PHAISTOS DISC", 2283 "PHAISTOSDISC"); 2284 2285 /** 2286 * Constant for the "Lycian" Unicode character block. 2287 * @since 1.7 2288 */ 2289 public static final UnicodeBlock LYCIAN = 2290 new UnicodeBlock("LYCIAN"); 2291 2292 /** 2293 * Constant for the "Carian" Unicode character block. 2294 * @since 1.7 2295 */ 2296 public static final UnicodeBlock CARIAN = 2297 new UnicodeBlock("CARIAN"); 2298 2299 /** 2300 * Constant for the "Old Persian" Unicode character block. 2301 * @since 1.7 2302 */ 2303 public static final UnicodeBlock OLD_PERSIAN = 2304 new UnicodeBlock("OLD_PERSIAN", 2305 "OLD PERSIAN", 2306 "OLDPERSIAN"); 2307 2308 /** 2309 * Constant for the "Imperial Aramaic" Unicode character block. 2310 * @since 1.7 2311 */ 2312 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2313 new UnicodeBlock("IMPERIAL_ARAMAIC", 2314 "IMPERIAL ARAMAIC", 2315 "IMPERIALARAMAIC"); 2316 2317 /** 2318 * Constant for the "Phoenician" Unicode character block. 2319 * @since 1.7 2320 */ 2321 public static final UnicodeBlock PHOENICIAN = 2322 new UnicodeBlock("PHOENICIAN"); 2323 2324 /** 2325 * Constant for the "Lydian" Unicode character block. 2326 * @since 1.7 2327 */ 2328 public static final UnicodeBlock LYDIAN = 2329 new UnicodeBlock("LYDIAN"); 2330 2331 /** 2332 * Constant for the "Kharoshthi" Unicode character block. 2333 * @since 1.7 2334 */ 2335 public static final UnicodeBlock KHAROSHTHI = 2336 new UnicodeBlock("KHAROSHTHI"); 2337 2338 /** 2339 * Constant for the "Old South Arabian" Unicode character block. 2340 * @since 1.7 2341 */ 2342 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2343 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2344 "OLD SOUTH ARABIAN", 2345 "OLDSOUTHARABIAN"); 2346 2347 /** 2348 * Constant for the "Avestan" Unicode character block. 2349 * @since 1.7 2350 */ 2351 public static final UnicodeBlock AVESTAN = 2352 new UnicodeBlock("AVESTAN"); 2353 2354 /** 2355 * Constant for the "Inscriptional Parthian" Unicode character block. 2356 * @since 1.7 2357 */ 2358 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2359 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2360 "INSCRIPTIONAL PARTHIAN", 2361 "INSCRIPTIONALPARTHIAN"); 2362 2363 /** 2364 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2365 * @since 1.7 2366 */ 2367 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2368 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2369 "INSCRIPTIONAL PAHLAVI", 2370 "INSCRIPTIONALPAHLAVI"); 2371 2372 /** 2373 * Constant for the "Old Turkic" Unicode character block. 2374 * @since 1.7 2375 */ 2376 public static final UnicodeBlock OLD_TURKIC = 2377 new UnicodeBlock("OLD_TURKIC", 2378 "OLD TURKIC", 2379 "OLDTURKIC"); 2380 2381 /** 2382 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2383 * @since 1.7 2384 */ 2385 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2386 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2387 "RUMI NUMERAL SYMBOLS", 2388 "RUMINUMERALSYMBOLS"); 2389 2390 /** 2391 * Constant for the "Brahmi" Unicode character block. 2392 * @since 1.7 2393 */ 2394 public static final UnicodeBlock BRAHMI = 2395 new UnicodeBlock("BRAHMI"); 2396 2397 /** 2398 * Constant for the "Kaithi" Unicode character block. 2399 * @since 1.7 2400 */ 2401 public static final UnicodeBlock KAITHI = 2402 new UnicodeBlock("KAITHI"); 2403 2404 /** 2405 * Constant for the "Cuneiform" Unicode character block. 2406 * @since 1.7 2407 */ 2408 public static final UnicodeBlock CUNEIFORM = 2409 new UnicodeBlock("CUNEIFORM"); 2410 2411 /** 2412 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2413 * character block. 2414 * @since 1.7 2415 */ 2416 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2417 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2418 "CUNEIFORM NUMBERS AND PUNCTUATION", 2419 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2420 2421 /** 2422 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2423 * @since 1.7 2424 */ 2425 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2426 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2427 "EGYPTIAN HIEROGLYPHS", 2428 "EGYPTIANHIEROGLYPHS"); 2429 2430 /** 2431 * Constant for the "Bamum Supplement" Unicode character block. 2432 * @since 1.7 2433 */ 2434 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2435 new UnicodeBlock("BAMUM_SUPPLEMENT", 2436 "BAMUM SUPPLEMENT", 2437 "BAMUMSUPPLEMENT"); 2438 2439 /** 2440 * Constant for the "Kana Supplement" Unicode character block. 2441 * @since 1.7 2442 */ 2443 public static final UnicodeBlock KANA_SUPPLEMENT = 2444 new UnicodeBlock("KANA_SUPPLEMENT", 2445 "KANA SUPPLEMENT", 2446 "KANASUPPLEMENT"); 2447 2448 /** 2449 * Constant for the "Ancient Greek Musical Notation" Unicode character 2450 * block. 2451 * @since 1.7 2452 */ 2453 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2454 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2455 "ANCIENT GREEK MUSICAL NOTATION", 2456 "ANCIENTGREEKMUSICALNOTATION"); 2457 2458 /** 2459 * Constant for the "Counting Rod Numerals" Unicode character block. 2460 * @since 1.7 2461 */ 2462 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2463 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2464 "COUNTING ROD NUMERALS", 2465 "COUNTINGRODNUMERALS"); 2466 2467 /** 2468 * Constant for the "Mahjong Tiles" Unicode character block. 2469 * @since 1.7 2470 */ 2471 public static final UnicodeBlock MAHJONG_TILES = 2472 new UnicodeBlock("MAHJONG_TILES", 2473 "MAHJONG TILES", 2474 "MAHJONGTILES"); 2475 2476 /** 2477 * Constant for the "Domino Tiles" Unicode character block. 2478 * @since 1.7 2479 */ 2480 public static final UnicodeBlock DOMINO_TILES = 2481 new UnicodeBlock("DOMINO_TILES", 2482 "DOMINO TILES", 2483 "DOMINOTILES"); 2484 2485 /** 2486 * Constant for the "Playing Cards" Unicode character block. 2487 * @since 1.7 2488 */ 2489 public static final UnicodeBlock PLAYING_CARDS = 2490 new UnicodeBlock("PLAYING_CARDS", 2491 "PLAYING CARDS", 2492 "PLAYINGCARDS"); 2493 2494 /** 2495 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2496 * block. 2497 * @since 1.7 2498 */ 2499 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2500 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2501 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2502 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2503 2504 /** 2505 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2506 * block. 2507 * @since 1.7 2508 */ 2509 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2510 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2511 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2512 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2513 2514 /** 2515 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2516 * character block. 2517 * @since 1.7 2518 */ 2519 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2520 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2521 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2522 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2523 2524 /** 2525 * Constant for the "Emoticons" Unicode character block. 2526 * @since 1.7 2527 */ 2528 public static final UnicodeBlock EMOTICONS = 2529 new UnicodeBlock("EMOTICONS"); 2530 2531 /** 2532 * Constant for the "Transport And Map Symbols" Unicode character block. 2533 * @since 1.7 2534 */ 2535 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2536 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2537 "TRANSPORT AND MAP SYMBOLS", 2538 "TRANSPORTANDMAPSYMBOLS"); 2539 2540 /** 2541 * Constant for the "Alchemical Symbols" Unicode character block. 2542 * @since 1.7 2543 */ 2544 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2545 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2546 "ALCHEMICAL SYMBOLS", 2547 "ALCHEMICALSYMBOLS"); 2548 2549 /** 2550 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2551 * character block. 2552 * @since 1.7 2553 */ 2554 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2555 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2556 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2557 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2558 2559 /** 2560 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2561 * character block. 2562 * @since 1.7 2563 */ 2564 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2565 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2566 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2567 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2568 2569 /** 2570 * Constant for the "Arabic Extended-A" Unicode character block. 2571 * @since 1.8 2572 */ 2573 public static final UnicodeBlock ARABIC_EXTENDED_A = 2574 new UnicodeBlock("ARABIC_EXTENDED_A", 2575 "ARABIC EXTENDED-A", 2576 "ARABICEXTENDED-A"); 2577 2578 /** 2579 * Constant for the "Sundanese Supplement" Unicode character block. 2580 * @since 1.8 2581 */ 2582 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2583 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2584 "SUNDANESE SUPPLEMENT", 2585 "SUNDANESESUPPLEMENT"); 2586 2587 /** 2588 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2589 * @since 1.8 2590 */ 2591 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2592 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2593 "MEETEI MAYEK EXTENSIONS", 2594 "MEETEIMAYEKEXTENSIONS"); 2595 2596 /** 2597 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2598 * @since 1.8 2599 */ 2600 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2601 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2602 "MEROITIC HIEROGLYPHS", 2603 "MEROITICHIEROGLYPHS"); 2604 2605 /** 2606 * Constant for the "Meroitic Cursive" Unicode character block. 2607 * @since 1.8 2608 */ 2609 public static final UnicodeBlock MEROITIC_CURSIVE = 2610 new UnicodeBlock("MEROITIC_CURSIVE", 2611 "MEROITIC CURSIVE", 2612 "MEROITICCURSIVE"); 2613 2614 /** 2615 * Constant for the "Sora Sompeng" Unicode character block. 2616 * @since 1.8 2617 */ 2618 public static final UnicodeBlock SORA_SOMPENG = 2619 new UnicodeBlock("SORA_SOMPENG", 2620 "SORA SOMPENG", 2621 "SORASOMPENG"); 2622 2623 /** 2624 * Constant for the "Chakma" Unicode character block. 2625 * @since 1.8 2626 */ 2627 public static final UnicodeBlock CHAKMA = 2628 new UnicodeBlock("CHAKMA"); 2629 2630 /** 2631 * Constant for the "Sharada" Unicode character block. 2632 * @since 1.8 2633 */ 2634 public static final UnicodeBlock SHARADA = 2635 new UnicodeBlock("SHARADA"); 2636 2637 /** 2638 * Constant for the "Takri" Unicode character block. 2639 * @since 1.8 2640 */ 2641 public static final UnicodeBlock TAKRI = 2642 new UnicodeBlock("TAKRI"); 2643 2644 /** 2645 * Constant for the "Miao" Unicode character block. 2646 * @since 1.8 2647 */ 2648 public static final UnicodeBlock MIAO = 2649 new UnicodeBlock("MIAO"); 2650 2651 /** 2652 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2653 * character block. 2654 * @since 1.8 2655 */ 2656 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2657 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2658 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2659 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2660 2661 /** 2662 * Constant for the "Combining Diacritical Marks Extended" Unicode 2663 * character block. 2664 * @since 9 2665 */ 2666 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2667 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2668 "COMBINING DIACRITICAL MARKS EXTENDED", 2669 "COMBININGDIACRITICALMARKSEXTENDED"); 2670 2671 /** 2672 * Constant for the "Myanmar Extended-B" Unicode character block. 2673 * @since 9 2674 */ 2675 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2676 new UnicodeBlock("MYANMAR_EXTENDED_B", 2677 "MYANMAR EXTENDED-B", 2678 "MYANMAREXTENDED-B"); 2679 2680 /** 2681 * Constant for the "Latin Extended-E" Unicode character block. 2682 * @since 9 2683 */ 2684 public static final UnicodeBlock LATIN_EXTENDED_E = 2685 new UnicodeBlock("LATIN_EXTENDED_E", 2686 "LATIN EXTENDED-E", 2687 "LATINEXTENDED-E"); 2688 2689 /** 2690 * Constant for the "Coptic Epact Numbers" Unicode character block. 2691 * @since 9 2692 */ 2693 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2694 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2695 "COPTIC EPACT NUMBERS", 2696 "COPTICEPACTNUMBERS"); 2697 2698 /** 2699 * Constant for the "Old Permic" Unicode character block. 2700 * @since 9 2701 */ 2702 public static final UnicodeBlock OLD_PERMIC = 2703 new UnicodeBlock("OLD_PERMIC", 2704 "OLD PERMIC", 2705 "OLDPERMIC"); 2706 2707 /** 2708 * Constant for the "Elbasan" Unicode character block. 2709 * @since 9 2710 */ 2711 public static final UnicodeBlock ELBASAN = 2712 new UnicodeBlock("ELBASAN"); 2713 2714 /** 2715 * Constant for the "Caucasian Albanian" Unicode character block. 2716 * @since 9 2717 */ 2718 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2719 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2720 "CAUCASIAN ALBANIAN", 2721 "CAUCASIANALBANIAN"); 2722 2723 /** 2724 * Constant for the "Linear A" Unicode character block. 2725 * @since 9 2726 */ 2727 public static final UnicodeBlock LINEAR_A = 2728 new UnicodeBlock("LINEAR_A", 2729 "LINEAR A", 2730 "LINEARA"); 2731 2732 /** 2733 * Constant for the "Palmyrene" Unicode character block. 2734 * @since 9 2735 */ 2736 public static final UnicodeBlock PALMYRENE = 2737 new UnicodeBlock("PALMYRENE"); 2738 2739 /** 2740 * Constant for the "Nabataean" Unicode character block. 2741 * @since 9 2742 */ 2743 public static final UnicodeBlock NABATAEAN = 2744 new UnicodeBlock("NABATAEAN"); 2745 2746 /** 2747 * Constant for the "Old North Arabian" Unicode character block. 2748 * @since 9 2749 */ 2750 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2751 new UnicodeBlock("OLD_NORTH_ARABIAN", 2752 "OLD NORTH ARABIAN", 2753 "OLDNORTHARABIAN"); 2754 2755 /** 2756 * Constant for the "Manichaean" Unicode character block. 2757 * @since 9 2758 */ 2759 public static final UnicodeBlock MANICHAEAN = 2760 new UnicodeBlock("MANICHAEAN"); 2761 2762 /** 2763 * Constant for the "Psalter Pahlavi" Unicode character block. 2764 * @since 9 2765 */ 2766 public static final UnicodeBlock PSALTER_PAHLAVI = 2767 new UnicodeBlock("PSALTER_PAHLAVI", 2768 "PSALTER PAHLAVI", 2769 "PSALTERPAHLAVI"); 2770 2771 /** 2772 * Constant for the "Mahajani" Unicode character block. 2773 * @since 9 2774 */ 2775 public static final UnicodeBlock MAHAJANI = 2776 new UnicodeBlock("MAHAJANI"); 2777 2778 /** 2779 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2780 * @since 9 2781 */ 2782 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2783 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2784 "SINHALA ARCHAIC NUMBERS", 2785 "SINHALAARCHAICNUMBERS"); 2786 2787 /** 2788 * Constant for the "Khojki" Unicode character block. 2789 * @since 9 2790 */ 2791 public static final UnicodeBlock KHOJKI = 2792 new UnicodeBlock("KHOJKI"); 2793 2794 /** 2795 * Constant for the "Khudawadi" Unicode character block. 2796 * @since 9 2797 */ 2798 public static final UnicodeBlock KHUDAWADI = 2799 new UnicodeBlock("KHUDAWADI"); 2800 2801 /** 2802 * Constant for the "Grantha" Unicode character block. 2803 * @since 9 2804 */ 2805 public static final UnicodeBlock GRANTHA = 2806 new UnicodeBlock("GRANTHA"); 2807 2808 /** 2809 * Constant for the "Tirhuta" Unicode character block. 2810 * @since 9 2811 */ 2812 public static final UnicodeBlock TIRHUTA = 2813 new UnicodeBlock("TIRHUTA"); 2814 2815 /** 2816 * Constant for the "Siddham" Unicode character block. 2817 * @since 9 2818 */ 2819 public static final UnicodeBlock SIDDHAM = 2820 new UnicodeBlock("SIDDHAM"); 2821 2822 /** 2823 * Constant for the "Modi" Unicode character block. 2824 * @since 9 2825 */ 2826 public static final UnicodeBlock MODI = 2827 new UnicodeBlock("MODI"); 2828 2829 /** 2830 * Constant for the "Warang Citi" Unicode character block. 2831 * @since 9 2832 */ 2833 public static final UnicodeBlock WARANG_CITI = 2834 new UnicodeBlock("WARANG_CITI", 2835 "WARANG CITI", 2836 "WARANGCITI"); 2837 2838 /** 2839 * Constant for the "Pau Cin Hau" Unicode character block. 2840 * @since 9 2841 */ 2842 public static final UnicodeBlock PAU_CIN_HAU = 2843 new UnicodeBlock("PAU_CIN_HAU", 2844 "PAU CIN HAU", 2845 "PAUCINHAU"); 2846 2847 /** 2848 * Constant for the "Mro" Unicode character block. 2849 * @since 9 2850 */ 2851 public static final UnicodeBlock MRO = 2852 new UnicodeBlock("MRO"); 2853 2854 /** 2855 * Constant for the "Bassa Vah" Unicode character block. 2856 * @since 9 2857 */ 2858 public static final UnicodeBlock BASSA_VAH = 2859 new UnicodeBlock("BASSA_VAH", 2860 "BASSA VAH", 2861 "BASSAVAH"); 2862 2863 /** 2864 * Constant for the "Pahawh Hmong" Unicode character block. 2865 * @since 9 2866 */ 2867 public static final UnicodeBlock PAHAWH_HMONG = 2868 new UnicodeBlock("PAHAWH_HMONG", 2869 "PAHAWH HMONG", 2870 "PAHAWHHMONG"); 2871 2872 /** 2873 * Constant for the "Duployan" Unicode character block. 2874 * @since 9 2875 */ 2876 public static final UnicodeBlock DUPLOYAN = 2877 new UnicodeBlock("DUPLOYAN"); 2878 2879 /** 2880 * Constant for the "Shorthand Format Controls" Unicode character block. 2881 * @since 9 2882 */ 2883 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2884 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2885 "SHORTHAND FORMAT CONTROLS", 2886 "SHORTHANDFORMATCONTROLS"); 2887 2888 /** 2889 * Constant for the "Mende Kikakui" Unicode character block. 2890 * @since 9 2891 */ 2892 public static final UnicodeBlock MENDE_KIKAKUI = 2893 new UnicodeBlock("MENDE_KIKAKUI", 2894 "MENDE KIKAKUI", 2895 "MENDEKIKAKUI"); 2896 2897 /** 2898 * Constant for the "Ornamental Dingbats" Unicode character block. 2899 * @since 9 2900 */ 2901 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2902 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2903 "ORNAMENTAL DINGBATS", 2904 "ORNAMENTALDINGBATS"); 2905 2906 /** 2907 * Constant for the "Geometric Shapes Extended" Unicode character block. 2908 * @since 9 2909 */ 2910 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2911 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2912 "GEOMETRIC SHAPES EXTENDED", 2913 "GEOMETRICSHAPESEXTENDED"); 2914 2915 /** 2916 * Constant for the "Supplemental Arrows-C" Unicode character block. 2917 * @since 9 2918 */ 2919 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2920 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2921 "SUPPLEMENTAL ARROWS-C", 2922 "SUPPLEMENTALARROWS-C"); 2923 2924 /** 2925 * Constant for the "Cherokee Supplement" Unicode character block. 2926 * @since 9 2927 */ 2928 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2929 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2930 "CHEROKEE SUPPLEMENT", 2931 "CHEROKEESUPPLEMENT"); 2932 2933 /** 2934 * Constant for the "Hatran" Unicode character block. 2935 * @since 9 2936 */ 2937 public static final UnicodeBlock HATRAN = 2938 new UnicodeBlock("HATRAN"); 2939 2940 /** 2941 * Constant for the "Old Hungarian" Unicode character block. 2942 * @since 9 2943 */ 2944 public static final UnicodeBlock OLD_HUNGARIAN = 2945 new UnicodeBlock("OLD_HUNGARIAN", 2946 "OLD HUNGARIAN", 2947 "OLDHUNGARIAN"); 2948 2949 /** 2950 * Constant for the "Multani" Unicode character block. 2951 * @since 9 2952 */ 2953 public static final UnicodeBlock MULTANI = 2954 new UnicodeBlock("MULTANI"); 2955 2956 /** 2957 * Constant for the "Ahom" Unicode character block. 2958 * @since 9 2959 */ 2960 public static final UnicodeBlock AHOM = 2961 new UnicodeBlock("AHOM"); 2962 2963 /** 2964 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2965 * @since 9 2966 */ 2967 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2968 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2969 "EARLY DYNASTIC CUNEIFORM", 2970 "EARLYDYNASTICCUNEIFORM"); 2971 2972 /** 2973 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2974 * @since 9 2975 */ 2976 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2977 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2978 "ANATOLIAN HIEROGLYPHS", 2979 "ANATOLIANHIEROGLYPHS"); 2980 2981 /** 2982 * Constant for the "Sutton SignWriting" Unicode character block. 2983 * @since 9 2984 */ 2985 public static final UnicodeBlock SUTTON_SIGNWRITING = 2986 new UnicodeBlock("SUTTON_SIGNWRITING", 2987 "SUTTON SIGNWRITING", 2988 "SUTTONSIGNWRITING"); 2989 2990 /** 2991 * Constant for the "Supplemental Symbols and Pictographs" Unicode 2992 * character block. 2993 * @since 9 2994 */ 2995 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2996 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2997 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 2998 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 2999 3000 /** 3001 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3002 * character block. 3003 * @since 9 3004 */ 3005 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3006 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3007 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3008 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3009 3010 /** 3011 * Constant for the "Syriac Supplement" Unicode 3012 * character block. 3013 * @since 11 3014 */ 3015 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3016 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3017 "SYRIAC SUPPLEMENT", 3018 "SYRIACSUPPLEMENT"); 3019 3020 /** 3021 * Constant for the "Cyrillic Extended-C" Unicode 3022 * character block. 3023 * @since 11 3024 */ 3025 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3026 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3027 "CYRILLIC EXTENDED-C", 3028 "CYRILLICEXTENDED-C"); 3029 3030 /** 3031 * Constant for the "Osage" Unicode 3032 * character block. 3033 * @since 11 3034 */ 3035 public static final UnicodeBlock OSAGE = 3036 new UnicodeBlock("OSAGE"); 3037 3038 /** 3039 * Constant for the "Newa" Unicode 3040 * character block. 3041 * @since 11 3042 */ 3043 public static final UnicodeBlock NEWA = 3044 new UnicodeBlock("NEWA"); 3045 3046 /** 3047 * Constant for the "Mongolian Supplement" Unicode 3048 * character block. 3049 * @since 11 3050 */ 3051 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3052 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3053 "MONGOLIAN SUPPLEMENT", 3054 "MONGOLIANSUPPLEMENT"); 3055 3056 /** 3057 * Constant for the "Marchen" Unicode 3058 * character block. 3059 * @since 11 3060 */ 3061 public static final UnicodeBlock MARCHEN = 3062 new UnicodeBlock("MARCHEN"); 3063 3064 /** 3065 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3066 * character block. 3067 * @since 11 3068 */ 3069 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3070 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3071 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3072 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3073 3074 /** 3075 * Constant for the "Tangut" Unicode 3076 * character block. 3077 * @since 11 3078 */ 3079 public static final UnicodeBlock TANGUT = 3080 new UnicodeBlock("TANGUT"); 3081 3082 /** 3083 * Constant for the "Tangut Components" Unicode 3084 * character block. 3085 * @since 11 3086 */ 3087 public static final UnicodeBlock TANGUT_COMPONENTS = 3088 new UnicodeBlock("TANGUT_COMPONENTS", 3089 "TANGUT COMPONENTS", 3090 "TANGUTCOMPONENTS"); 3091 3092 /** 3093 * Constant for the "Kana Extended-A" Unicode 3094 * character block. 3095 * @since 11 3096 */ 3097 public static final UnicodeBlock KANA_EXTENDED_A = 3098 new UnicodeBlock("KANA_EXTENDED_A", 3099 "KANA EXTENDED-A", 3100 "KANAEXTENDED-A"); 3101 /** 3102 * Constant for the "Glagolitic Supplement" Unicode 3103 * character block. 3104 * @since 11 3105 */ 3106 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3107 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3108 "GLAGOLITIC SUPPLEMENT", 3109 "GLAGOLITICSUPPLEMENT"); 3110 /** 3111 * Constant for the "Adlam" Unicode 3112 * character block. 3113 * @since 11 3114 */ 3115 public static final UnicodeBlock ADLAM = 3116 new UnicodeBlock("ADLAM"); 3117 3118 /** 3119 * Constant for the "Masaram Gondi" Unicode 3120 * character block. 3121 * @since 11 3122 */ 3123 public static final UnicodeBlock MASARAM_GONDI = 3124 new UnicodeBlock("MASARAM_GONDI", 3125 "MASARAM GONDI", 3126 "MASARAMGONDI"); 3127 3128 /** 3129 * Constant for the "Zanabazar Square" Unicode 3130 * character block. 3131 * @since 11 3132 */ 3133 public static final UnicodeBlock ZANABAZAR_SQUARE = 3134 new UnicodeBlock("ZANABAZAR_SQUARE", 3135 "ZANABAZAR SQUARE", 3136 "ZANABAZARSQUARE"); 3137 3138 /** 3139 * Constant for the "Nushu" Unicode 3140 * character block. 3141 * @since 11 3142 */ 3143 public static final UnicodeBlock NUSHU = 3144 new UnicodeBlock("NUSHU"); 3145 3146 /** 3147 * Constant for the "Soyombo" Unicode 3148 * character block. 3149 * @since 11 3150 */ 3151 public static final UnicodeBlock SOYOMBO = 3152 new UnicodeBlock("SOYOMBO"); 3153 3154 /** 3155 * Constant for the "Bhaiksuki" Unicode 3156 * character block. 3157 * @since 11 3158 */ 3159 public static final UnicodeBlock BHAIKSUKI = 3160 new UnicodeBlock("BHAIKSUKI"); 3161 3162 /** 3163 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3164 * character block. 3165 * @since 11 3166 */ 3167 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3168 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3169 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3170 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3171 /** 3172 * Constant for the "Georgian Extended" Unicode 3173 * character block. 3174 * @since 12 3175 */ 3176 public static final UnicodeBlock GEORGIAN_EXTENDED = 3177 new UnicodeBlock("GEORGIAN_EXTENDED", 3178 "GEORGIAN EXTENDED", 3179 "GEORGIANEXTENDED"); 3180 3181 /** 3182 * Constant for the "Hanifi Rohingya" Unicode 3183 * character block. 3184 * @since 12 3185 */ 3186 public static final UnicodeBlock HANIFI_ROHINGYA = 3187 new UnicodeBlock("HANIFI_ROHINGYA", 3188 "HANIFI ROHINGYA", 3189 "HANIFIROHINGYA"); 3190 3191 /** 3192 * Constant for the "Old Sogdian" Unicode 3193 * character block. 3194 * @since 12 3195 */ 3196 public static final UnicodeBlock OLD_SOGDIAN = 3197 new UnicodeBlock("OLD_SOGDIAN", 3198 "OLD SOGDIAN", 3199 "OLDSOGDIAN"); 3200 3201 /** 3202 * Constant for the "Sogdian" Unicode 3203 * character block. 3204 * @since 12 3205 */ 3206 public static final UnicodeBlock SOGDIAN = 3207 new UnicodeBlock("SOGDIAN"); 3208 3209 /** 3210 * Constant for the "Dogra" Unicode 3211 * character block. 3212 * @since 12 3213 */ 3214 public static final UnicodeBlock DOGRA = 3215 new UnicodeBlock("DOGRA"); 3216 3217 /** 3218 * Constant for the "Gunjala Gondi" Unicode 3219 * character block. 3220 * @since 12 3221 */ 3222 public static final UnicodeBlock GUNJALA_GONDI = 3223 new UnicodeBlock("GUNJALA_GONDI", 3224 "GUNJALA GONDI", 3225 "GUNJALAGONDI"); 3226 3227 /** 3228 * Constant for the "Makasar" Unicode 3229 * character block. 3230 * @since 12 3231 */ 3232 public static final UnicodeBlock MAKASAR = 3233 new UnicodeBlock("MAKASAR"); 3234 3235 /** 3236 * Constant for the "Medefaidrin" Unicode 3237 * character block. 3238 * @since 12 3239 */ 3240 public static final UnicodeBlock MEDEFAIDRIN = 3241 new UnicodeBlock("MEDEFAIDRIN"); 3242 3243 /** 3244 * Constant for the "Mayan Numerals" Unicode 3245 * character block. 3246 * @since 12 3247 */ 3248 public static final UnicodeBlock MAYAN_NUMERALS = 3249 new UnicodeBlock("MAYAN_NUMERALS", 3250 "MAYAN NUMERALS", 3251 "MAYANNUMERALS"); 3252 3253 /** 3254 * Constant for the "Indic Siyaq Numbers" Unicode 3255 * character block. 3256 * @since 12 3257 */ 3258 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3259 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3260 "INDIC SIYAQ NUMBERS", 3261 "INDICSIYAQNUMBERS"); 3262 3263 /** 3264 * Constant for the "Chess Symbols" Unicode 3265 * character block. 3266 * @since 12 3267 */ 3268 public static final UnicodeBlock CHESS_SYMBOLS = 3269 new UnicodeBlock("CHESS_SYMBOLS", 3270 "CHESS SYMBOLS", 3271 "CHESSSYMBOLS"); 3272 3273 /** 3274 * Constant for the "Elymaic" Unicode 3275 * character block. 3276 * @since 13 3277 */ 3278 public static final UnicodeBlock ELYMAIC = 3279 new UnicodeBlock("ELYMAIC"); 3280 3281 /** 3282 * Constant for the "Nandinagari" Unicode 3283 * character block. 3284 * @since 13 3285 */ 3286 public static final UnicodeBlock NANDINAGARI = 3287 new UnicodeBlock("NANDINAGARI"); 3288 3289 /** 3290 * Constant for the "Tamil Supplement" Unicode 3291 * character block. 3292 * @since 13 3293 */ 3294 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3295 new UnicodeBlock("TAMIL_SUPPLEMENT", 3296 "TAMIL SUPPLEMENT", 3297 "TAMILSUPPLEMENT"); 3298 3299 /** 3300 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3301 * character block. 3302 * @since 13 3303 */ 3304 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3305 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3306 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3307 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3308 3309 /** 3310 * Constant for the "Small Kana Extension" Unicode 3311 * character block. 3312 * @since 13 3313 */ 3314 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3315 new UnicodeBlock("SMALL_KANA_EXTENSION", 3316 "SMALL KANA EXTENSION", 3317 "SMALLKANAEXTENSION"); 3318 3319 /** 3320 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3321 * character block. 3322 * @since 13 3323 */ 3324 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3325 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3326 "NYIAKENG PUACHUE HMONG", 3327 "NYIAKENGPUACHUEHMONG"); 3328 3329 /** 3330 * Constant for the "Wancho" Unicode 3331 * character block. 3332 * @since 13 3333 */ 3334 public static final UnicodeBlock WANCHO = 3335 new UnicodeBlock("WANCHO"); 3336 3337 /** 3338 * Constant for the "Ottoman Siyaq Numbers" Unicode 3339 * character block. 3340 * @since 13 3341 */ 3342 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3343 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3344 "OTTOMAN SIYAQ NUMBERS", 3345 "OTTOMANSIYAQNUMBERS"); 3346 3347 /** 3348 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3349 * character block. 3350 * @since 13 3351 */ 3352 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3353 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3354 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3355 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3356 3357 /** 3358 * Constant for the "Yezidi" Unicode 3359 * character block. 3360 * @since 15 3361 */ 3362 public static final UnicodeBlock YEZIDI = 3363 new UnicodeBlock("YEZIDI"); 3364 3365 /** 3366 * Constant for the "Chorasmian" Unicode 3367 * character block. 3368 * @since 15 3369 */ 3370 public static final UnicodeBlock CHORASMIAN = 3371 new UnicodeBlock("CHORASMIAN"); 3372 3373 /** 3374 * Constant for the "Dives Akuru" Unicode 3375 * character block. 3376 * @since 15 3377 */ 3378 public static final UnicodeBlock DIVES_AKURU = 3379 new UnicodeBlock("DIVES_AKURU", 3380 "DIVES AKURU", 3381 "DIVESAKURU"); 3382 3383 /** 3384 * Constant for the "Lisu Supplement" Unicode 3385 * character block. 3386 * @since 15 3387 */ 3388 public static final UnicodeBlock LISU_SUPPLEMENT = 3389 new UnicodeBlock("LISU_SUPPLEMENT", 3390 "LISU SUPPLEMENT", 3391 "LISUSUPPLEMENT"); 3392 3393 /** 3394 * Constant for the "Khitan Small Script" Unicode 3395 * character block. 3396 * @since 15 3397 */ 3398 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3399 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3400 "KHITAN SMALL SCRIPT", 3401 "KHITANSMALLSCRIPT"); 3402 3403 /** 3404 * Constant for the "Tangut Supplement" Unicode 3405 * character block. 3406 * @since 15 3407 */ 3408 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3409 new UnicodeBlock("TANGUT_SUPPLEMENT", 3410 "TANGUT SUPPLEMENT", 3411 "TANGUTSUPPLEMENT"); 3412 3413 /** 3414 * Constant for the "Symbols for Legacy Computing" Unicode 3415 * character block. 3416 * @since 15 3417 */ 3418 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3419 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3420 "SYMBOLS FOR LEGACY COMPUTING", 3421 "SYMBOLSFORLEGACYCOMPUTING"); 3422 3423 /** 3424 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3425 * character block. 3426 * @since 15 3427 */ 3428 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3429 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3430 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3431 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3432 3433 /** 3434 * Constant for the "Arabic Extended-B" Unicode 3435 * character block. 3436 * @since 19 3437 */ 3438 public static final UnicodeBlock ARABIC_EXTENDED_B = 3439 new UnicodeBlock("ARABIC_EXTENDED_B", 3440 "ARABIC EXTENDED-B", 3441 "ARABICEXTENDED-B"); 3442 3443 /** 3444 * Constant for the "Vithkuqi" Unicode 3445 * character block. 3446 * @since 19 3447 */ 3448 public static final UnicodeBlock VITHKUQI = 3449 new UnicodeBlock("VITHKUQI"); 3450 3451 /** 3452 * Constant for the "Latin Extended-F" Unicode 3453 * character block. 3454 * @since 19 3455 */ 3456 public static final UnicodeBlock LATIN_EXTENDED_F = 3457 new UnicodeBlock("LATIN_EXTENDED_F", 3458 "LATIN EXTENDED-F", 3459 "LATINEXTENDED-F"); 3460 3461 /** 3462 * Constant for the "Old Uyghur" Unicode 3463 * character block. 3464 * @since 19 3465 */ 3466 public static final UnicodeBlock OLD_UYGHUR = 3467 new UnicodeBlock("OLD_UYGHUR", 3468 "OLD UYGHUR", 3469 "OLDUYGHUR"); 3470 3471 /** 3472 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3473 * character block. 3474 * @since 19 3475 */ 3476 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3477 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3478 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3479 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3480 3481 /** 3482 * Constant for the "Cypro-Minoan" Unicode 3483 * character block. 3484 * @since 19 3485 */ 3486 public static final UnicodeBlock CYPRO_MINOAN = 3487 new UnicodeBlock("CYPRO_MINOAN", 3488 "CYPRO-MINOAN", 3489 "CYPRO-MINOAN"); 3490 3491 /** 3492 * Constant for the "Tangsa" Unicode 3493 * character block. 3494 * @since 19 3495 */ 3496 public static final UnicodeBlock TANGSA = 3497 new UnicodeBlock("TANGSA"); 3498 3499 /** 3500 * Constant for the "Kana Extended-B" Unicode 3501 * character block. 3502 * @since 19 3503 */ 3504 public static final UnicodeBlock KANA_EXTENDED_B = 3505 new UnicodeBlock("KANA_EXTENDED_B", 3506 "KANA EXTENDED-B", 3507 "KANAEXTENDED-B"); 3508 3509 /** 3510 * Constant for the "Znamenny Musical Notation" Unicode 3511 * character block. 3512 * @since 19 3513 */ 3514 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3515 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3516 "ZNAMENNY MUSICAL NOTATION", 3517 "ZNAMENNYMUSICALNOTATION"); 3518 3519 /** 3520 * Constant for the "Latin Extended-G" Unicode 3521 * character block. 3522 * @since 19 3523 */ 3524 public static final UnicodeBlock LATIN_EXTENDED_G = 3525 new UnicodeBlock("LATIN_EXTENDED_G", 3526 "LATIN EXTENDED-G", 3527 "LATINEXTENDED-G"); 3528 3529 /** 3530 * Constant for the "Toto" Unicode 3531 * character block. 3532 * @since 19 3533 */ 3534 public static final UnicodeBlock TOTO = 3535 new UnicodeBlock("TOTO"); 3536 3537 /** 3538 * Constant for the "Ethiopic Extended-B" Unicode 3539 * character block. 3540 * @since 19 3541 */ 3542 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3543 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3544 "ETHIOPIC EXTENDED-B", 3545 "ETHIOPICEXTENDED-B"); 3546 3547 /** 3548 * Constant for the "Arabic Extended-C" Unicode 3549 * character block. 3550 * @since 20 3551 */ 3552 public static final UnicodeBlock ARABIC_EXTENDED_C = 3553 new UnicodeBlock("ARABIC_EXTENDED_C", 3554 "ARABIC EXTENDED-C", 3555 "ARABICEXTENDED-C"); 3556 3557 /** 3558 * Constant for the "Devanagari Extended-A" Unicode 3559 * character block. 3560 * @since 20 3561 */ 3562 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3563 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3564 "DEVANAGARI EXTENDED-A", 3565 "DEVANAGARIEXTENDED-A"); 3566 3567 /** 3568 * Constant for the "Kawi" Unicode 3569 * character block. 3570 * @since 20 3571 */ 3572 public static final UnicodeBlock KAWI = 3573 new UnicodeBlock("KAWI"); 3574 3575 /** 3576 * Constant for the "Kaktovik Numerals" Unicode 3577 * character block. 3578 * @since 20 3579 */ 3580 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3581 new UnicodeBlock("KAKTOVIK_NUMERALS", 3582 "KAKTOVIK NUMERALS", 3583 "KAKTOVIKNUMERALS"); 3584 3585 /** 3586 * Constant for the "Cyrillic Extended-D" Unicode 3587 * character block. 3588 * @since 20 3589 */ 3590 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3591 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3592 "CYRILLIC EXTENDED-D", 3593 "CYRILLICEXTENDED-D"); 3594 3595 /** 3596 * Constant for the "Nag Mundari" Unicode 3597 * character block. 3598 * @since 20 3599 */ 3600 public static final UnicodeBlock NAG_MUNDARI = 3601 new UnicodeBlock("NAG_MUNDARI", 3602 "NAG MUNDARI", 3603 "NAGMUNDARI"); 3604 3605 /** 3606 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3607 * character block. 3608 * @since 20 3609 */ 3610 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3611 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3612 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3613 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3614 3615 /** 3616 * Constant for the "CJK Unified Ideographs Extension I" Unicode 3617 * character block. 3618 * @since 22 3619 */ 3620 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 3621 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 3622 "CJK UNIFIED IDEOGRAPHS EXTENSION I", 3623 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI"); 3624 3625 /** 3626 * Constant for the "Todhri" Unicode 3627 * character block. 3628 * @since 24 3629 */ 3630 public static final UnicodeBlock TODHRI = 3631 new UnicodeBlock("TODHRI"); 3632 3633 /** 3634 * Constant for the "Garay" Unicode 3635 * character block. 3636 * @since 24 3637 */ 3638 public static final UnicodeBlock GARAY = 3639 new UnicodeBlock("GARAY"); 3640 3641 /** 3642 * Constant for the "Tulu-Tigalari" Unicode 3643 * character block. 3644 * @since 24 3645 */ 3646 public static final UnicodeBlock TULU_TIGALARI = 3647 new UnicodeBlock("TULU_TIGALARI", 3648 "TULU-TIGALARI"); 3649 3650 /** 3651 * Constant for the "Myanmar Extended-C" Unicode 3652 * character block. 3653 * @since 24 3654 */ 3655 public static final UnicodeBlock MYANMAR_EXTENDED_C = 3656 new UnicodeBlock("MYANMAR_EXTENDED_C", 3657 "MYANMAR EXTENDED-C", 3658 "MYANMAREXTENDED-C"); 3659 3660 /** 3661 * Constant for the "Sunuwar" Unicode 3662 * character block. 3663 * @since 24 3664 */ 3665 public static final UnicodeBlock SUNUWAR = 3666 new UnicodeBlock("SUNUWAR"); 3667 3668 /** 3669 * Constant for the "Egyptian Hieroglyphs Extended-A" Unicode 3670 * character block. 3671 * @since 24 3672 */ 3673 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS_EXTENDED_A = 3674 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS_EXTENDED_A", 3675 "EGYPTIAN HIEROGLYPHS EXTENDED-A", 3676 "EGYPTIANHIEROGLYPHSEXTENDED-A"); 3677 3678 /** 3679 * Constant for the "Gurung Khema" Unicode 3680 * character block. 3681 * @since 24 3682 */ 3683 public static final UnicodeBlock GURUNG_KHEMA = 3684 new UnicodeBlock("GURUNG_KHEMA", 3685 "GURUNG KHEMA", 3686 "GURUNGKHEMA"); 3687 3688 /** 3689 * Constant for the "Kirat Rai" Unicode 3690 * character block. 3691 * @since 24 3692 */ 3693 public static final UnicodeBlock KIRAT_RAI = 3694 new UnicodeBlock("KIRAT_RAI", 3695 "KIRAT RAI", 3696 "KIRATRAI"); 3697 3698 /** 3699 * Constant for the "Symbols for Legacy Computing Supplement" Unicode 3700 * character block. 3701 * @since 24 3702 */ 3703 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT = 3704 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT", 3705 "SYMBOLS FOR LEGACY COMPUTING SUPPLEMENT", 3706 "SYMBOLSFORLEGACYCOMPUTINGSUPPLEMENT"); 3707 3708 /** 3709 * Constant for the "Ol Onal" Unicode 3710 * character block. 3711 * @since 24 3712 */ 3713 public static final UnicodeBlock OL_ONAL = 3714 new UnicodeBlock("OL_ONAL", 3715 "OL ONAL", 3716 "OLONAL"); 3717 3718 private static final int[] blockStarts = { 3719 0x0000, // 0000..007F; Basic Latin 3720 0x0080, // 0080..00FF; Latin-1 Supplement 3721 0x0100, // 0100..017F; Latin Extended-A 3722 0x0180, // 0180..024F; Latin Extended-B 3723 0x0250, // 0250..02AF; IPA Extensions 3724 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3725 0x0300, // 0300..036F; Combining Diacritical Marks 3726 0x0370, // 0370..03FF; Greek and Coptic 3727 0x0400, // 0400..04FF; Cyrillic 3728 0x0500, // 0500..052F; Cyrillic Supplement 3729 0x0530, // 0530..058F; Armenian 3730 0x0590, // 0590..05FF; Hebrew 3731 0x0600, // 0600..06FF; Arabic 3732 0x0700, // 0700..074F; Syriac 3733 0x0750, // 0750..077F; Arabic Supplement 3734 0x0780, // 0780..07BF; Thaana 3735 0x07C0, // 07C0..07FF; NKo 3736 0x0800, // 0800..083F; Samaritan 3737 0x0840, // 0840..085F; Mandaic 3738 0x0860, // 0860..086F; Syriac Supplement 3739 0x0870, // 0870..089F; Arabic Extended-B 3740 0x08A0, // 08A0..08FF; Arabic Extended-A 3741 0x0900, // 0900..097F; Devanagari 3742 0x0980, // 0980..09FF; Bengali 3743 0x0A00, // 0A00..0A7F; Gurmukhi 3744 0x0A80, // 0A80..0AFF; Gujarati 3745 0x0B00, // 0B00..0B7F; Oriya 3746 0x0B80, // 0B80..0BFF; Tamil 3747 0x0C00, // 0C00..0C7F; Telugu 3748 0x0C80, // 0C80..0CFF; Kannada 3749 0x0D00, // 0D00..0D7F; Malayalam 3750 0x0D80, // 0D80..0DFF; Sinhala 3751 0x0E00, // 0E00..0E7F; Thai 3752 0x0E80, // 0E80..0EFF; Lao 3753 0x0F00, // 0F00..0FFF; Tibetan 3754 0x1000, // 1000..109F; Myanmar 3755 0x10A0, // 10A0..10FF; Georgian 3756 0x1100, // 1100..11FF; Hangul Jamo 3757 0x1200, // 1200..137F; Ethiopic 3758 0x1380, // 1380..139F; Ethiopic Supplement 3759 0x13A0, // 13A0..13FF; Cherokee 3760 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3761 0x1680, // 1680..169F; Ogham 3762 0x16A0, // 16A0..16FF; Runic 3763 0x1700, // 1700..171F; Tagalog 3764 0x1720, // 1720..173F; Hanunoo 3765 0x1740, // 1740..175F; Buhid 3766 0x1760, // 1760..177F; Tagbanwa 3767 0x1780, // 1780..17FF; Khmer 3768 0x1800, // 1800..18AF; Mongolian 3769 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3770 0x1900, // 1900..194F; Limbu 3771 0x1950, // 1950..197F; Tai Le 3772 0x1980, // 1980..19DF; New Tai Lue 3773 0x19E0, // 19E0..19FF; Khmer Symbols 3774 0x1A00, // 1A00..1A1F; Buginese 3775 0x1A20, // 1A20..1AAF; Tai Tham 3776 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3777 0x1B00, // 1B00..1B7F; Balinese 3778 0x1B80, // 1B80..1BBF; Sundanese 3779 0x1BC0, // 1BC0..1BFF; Batak 3780 0x1C00, // 1C00..1C4F; Lepcha 3781 0x1C50, // 1C50..1C7F; Ol Chiki 3782 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3783 0x1C90, // 1C90..1CBF; Georgian Extended 3784 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3785 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3786 0x1D00, // 1D00..1D7F; Phonetic Extensions 3787 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3788 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3789 0x1E00, // 1E00..1EFF; Latin Extended Additional 3790 0x1F00, // 1F00..1FFF; Greek Extended 3791 0x2000, // 2000..206F; General Punctuation 3792 0x2070, // 2070..209F; Superscripts and Subscripts 3793 0x20A0, // 20A0..20CF; Currency Symbols 3794 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3795 0x2100, // 2100..214F; Letterlike Symbols 3796 0x2150, // 2150..218F; Number Forms 3797 0x2190, // 2190..21FF; Arrows 3798 0x2200, // 2200..22FF; Mathematical Operators 3799 0x2300, // 2300..23FF; Miscellaneous Technical 3800 0x2400, // 2400..243F; Control Pictures 3801 0x2440, // 2440..245F; Optical Character Recognition 3802 0x2460, // 2460..24FF; Enclosed Alphanumerics 3803 0x2500, // 2500..257F; Box Drawing 3804 0x2580, // 2580..259F; Block Elements 3805 0x25A0, // 25A0..25FF; Geometric Shapes 3806 0x2600, // 2600..26FF; Miscellaneous Symbols 3807 0x2700, // 2700..27BF; Dingbats 3808 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3809 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3810 0x2800, // 2800..28FF; Braille Patterns 3811 0x2900, // 2900..297F; Supplemental Arrows-B 3812 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3813 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3814 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3815 0x2C00, // 2C00..2C5F; Glagolitic 3816 0x2C60, // 2C60..2C7F; Latin Extended-C 3817 0x2C80, // 2C80..2CFF; Coptic 3818 0x2D00, // 2D00..2D2F; Georgian Supplement 3819 0x2D30, // 2D30..2D7F; Tifinagh 3820 0x2D80, // 2D80..2DDF; Ethiopic Extended 3821 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3822 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3823 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3824 0x2F00, // 2F00..2FDF; Kangxi Radicals 3825 0x2FE0, // unassigned 3826 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3827 0x3000, // 3000..303F; CJK Symbols and Punctuation 3828 0x3040, // 3040..309F; Hiragana 3829 0x30A0, // 30A0..30FF; Katakana 3830 0x3100, // 3100..312F; Bopomofo 3831 0x3130, // 3130..318F; Hangul Compatibility Jamo 3832 0x3190, // 3190..319F; Kanbun 3833 0x31A0, // 31A0..31BF; Bopomofo Extended 3834 0x31C0, // 31C0..31EF; CJK Strokes 3835 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3836 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3837 0x3300, // 3300..33FF; CJK Compatibility 3838 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3839 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3840 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3841 0xA000, // A000..A48F; Yi Syllables 3842 0xA490, // A490..A4CF; Yi Radicals 3843 0xA4D0, // A4D0..A4FF; Lisu 3844 0xA500, // A500..A63F; Vai 3845 0xA640, // A640..A69F; Cyrillic Extended-B 3846 0xA6A0, // A6A0..A6FF; Bamum 3847 0xA700, // A700..A71F; Modifier Tone Letters 3848 0xA720, // A720..A7FF; Latin Extended-D 3849 0xA800, // A800..A82F; Syloti Nagri 3850 0xA830, // A830..A83F; Common Indic Number Forms 3851 0xA840, // A840..A87F; Phags-pa 3852 0xA880, // A880..A8DF; Saurashtra 3853 0xA8E0, // A8E0..A8FF; Devanagari Extended 3854 0xA900, // A900..A92F; Kayah Li 3855 0xA930, // A930..A95F; Rejang 3856 0xA960, // A960..A97F; Hangul Jamo Extended-A 3857 0xA980, // A980..A9DF; Javanese 3858 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3859 0xAA00, // AA00..AA5F; Cham 3860 0xAA60, // AA60..AA7F; Myanmar Extended-A 3861 0xAA80, // AA80..AADF; Tai Viet 3862 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3863 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3864 0xAB30, // AB30..AB6F; Latin Extended-E 3865 0xAB70, // AB70..ABBF; Cherokee Supplement 3866 0xABC0, // ABC0..ABFF; Meetei Mayek 3867 0xAC00, // AC00..D7AF; Hangul Syllables 3868 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3869 0xD800, // D800..DB7F; High Surrogates 3870 0xDB80, // DB80..DBFF; High Private Use Surrogates 3871 0xDC00, // DC00..DFFF; Low Surrogates 3872 0xE000, // E000..F8FF; Private Use Area 3873 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3874 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3875 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3876 0xFE00, // FE00..FE0F; Variation Selectors 3877 0xFE10, // FE10..FE1F; Vertical Forms 3878 0xFE20, // FE20..FE2F; Combining Half Marks 3879 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3880 0xFE50, // FE50..FE6F; Small Form Variants 3881 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3882 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3883 0xFFF0, // FFF0..FFFF; Specials 3884 0x10000, // 10000..1007F; Linear B Syllabary 3885 0x10080, // 10080..100FF; Linear B Ideograms 3886 0x10100, // 10100..1013F; Aegean Numbers 3887 0x10140, // 10140..1018F; Ancient Greek Numbers 3888 0x10190, // 10190..101CF; Ancient Symbols 3889 0x101D0, // 101D0..101FF; Phaistos Disc 3890 0x10200, // unassigned 3891 0x10280, // 10280..1029F; Lycian 3892 0x102A0, // 102A0..102DF; Carian 3893 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3894 0x10300, // 10300..1032F; Old Italic 3895 0x10330, // 10330..1034F; Gothic 3896 0x10350, // 10350..1037F; Old Permic 3897 0x10380, // 10380..1039F; Ugaritic 3898 0x103A0, // 103A0..103DF; Old Persian 3899 0x103E0, // unassigned 3900 0x10400, // 10400..1044F; Deseret 3901 0x10450, // 10450..1047F; Shavian 3902 0x10480, // 10480..104AF; Osmanya 3903 0x104B0, // 104B0..104FF; Osage 3904 0x10500, // 10500..1052F; Elbasan 3905 0x10530, // 10530..1056F; Caucasian Albanian 3906 0x10570, // 10570..105BF; Vithkuqi 3907 0x105C0, // 105C0..105FF; Todhri 3908 0x10600, // 10600..1077F; Linear A 3909 0x10780, // 10780..107BF; Latin Extended-F 3910 0x107C0, // unassigned 3911 0x10800, // 10800..1083F; Cypriot Syllabary 3912 0x10840, // 10840..1085F; Imperial Aramaic 3913 0x10860, // 10860..1087F; Palmyrene 3914 0x10880, // 10880..108AF; Nabataean 3915 0x108B0, // unassigned 3916 0x108E0, // 108E0..108FF; Hatran 3917 0x10900, // 10900..1091F; Phoenician 3918 0x10920, // 10920..1093F; Lydian 3919 0x10940, // unassigned 3920 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3921 0x109A0, // 109A0..109FF; Meroitic Cursive 3922 0x10A00, // 10A00..10A5F; Kharoshthi 3923 0x10A60, // 10A60..10A7F; Old South Arabian 3924 0x10A80, // 10A80..10A9F; Old North Arabian 3925 0x10AA0, // unassigned 3926 0x10AC0, // 10AC0..10AFF; Manichaean 3927 0x10B00, // 10B00..10B3F; Avestan 3928 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3929 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3930 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3931 0x10BB0, // unassigned 3932 0x10C00, // 10C00..10C4F; Old Turkic 3933 0x10C50, // unassigned 3934 0x10C80, // 10C80..10CFF; Old Hungarian 3935 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3936 0x10D40, // 10D40..10D8F; Garay 3937 0x10D90, // unassigned 3938 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3939 0x10E80, // 10E80..10EBF; Yezidi 3940 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 3941 0x10F00, // 10F00..10F2F; Old Sogdian 3942 0x10F30, // 10F30..10F6F; Sogdian 3943 0x10F70, // 10F70..10FAF; Old Uyghur 3944 0x10FB0, // 10FB0..10FDF; Chorasmian 3945 0x10FE0, // 10FE0..10FFF; Elymaic 3946 0x11000, // 11000..1107F; Brahmi 3947 0x11080, // 11080..110CF; Kaithi 3948 0x110D0, // 110D0..110FF; Sora Sompeng 3949 0x11100, // 11100..1114F; Chakma 3950 0x11150, // 11150..1117F; Mahajani 3951 0x11180, // 11180..111DF; Sharada 3952 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3953 0x11200, // 11200..1124F; Khojki 3954 0x11250, // unassigned 3955 0x11280, // 11280..112AF; Multani 3956 0x112B0, // 112B0..112FF; Khudawadi 3957 0x11300, // 11300..1137F; Grantha 3958 0x11380, // 11380..113FF; Tulu-Tigalari 3959 0x11400, // 11400..1147F; Newa 3960 0x11480, // 11480..114DF; Tirhuta 3961 0x114E0, // unassigned 3962 0x11580, // 11580..115FF; Siddham 3963 0x11600, // 11600..1165F; Modi 3964 0x11660, // 11660..1167F; Mongolian Supplement 3965 0x11680, // 11680..116CF; Takri 3966 0x116D0, // 116D0..116FF; Myanmar Extended-C 3967 0x11700, // 11700..1174F; Ahom 3968 0x11750, // unassigned 3969 0x11800, // 11800..1184F; Dogra 3970 0x11850, // unassigned 3971 0x118A0, // 118A0..118FF; Warang Citi 3972 0x11900, // 11900..1195F; Dives Akuru 3973 0x11960, // unassigned 3974 0x119A0, // 119A0..119FF; Nandinagari 3975 0x11A00, // 11A00..11A4F; Zanabazar Square 3976 0x11A50, // 11A50..11AAF; Soyombo 3977 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 3978 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3979 0x11B00, // 11B00..11B5F; Devanagari Extended-A 3980 0x11B60, // unassigned 3981 0x11BC0, // 11BC0..11BFF; Sunuwar 3982 0x11C00, // 11C00..11C6F; Bhaiksuki 3983 0x11C70, // 11C70..11CBF; Marchen 3984 0x11CC0, // unassigned 3985 0x11D00, // 11D00..11D5F; Masaram Gondi 3986 0x11D60, // 11D60..11DAF; Gunjala Gondi 3987 0x11DB0, // unassigned 3988 0x11EE0, // 11EE0..11EFF; Makasar 3989 0x11F00, // 11F00..11F5F; Kawi 3990 0x11F60, // unassigned 3991 0x11FB0, // 11FB0..11FBF; Lisu Supplement 3992 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3993 0x12000, // 12000..123FF; Cuneiform 3994 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3995 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3996 0x12550, // unassigned 3997 0x12F90, // 12F90..12FFF; Cypro-Minoan 3998 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3999 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 4000 0x13460, // 13460..143FF; Egyptian Hieroglyphs Extended-A 4001 0x14400, // 14400..1467F; Anatolian Hieroglyphs 4002 0x14680, // unassigned 4003 0x16100, // 16100..1613F; Gurung Khema 4004 0x16140, // unassigned 4005 0x16800, // 16800..16A3F; Bamum Supplement 4006 0x16A40, // 16A40..16A6F; Mro 4007 0x16A70, // 16A70..16ACF; Tangsa 4008 0x16AD0, // 16AD0..16AFF; Bassa Vah 4009 0x16B00, // 16B00..16B8F; Pahawh Hmong 4010 0x16B90, // unassigned 4011 0x16D40, // 16D40..16D7F; Kirat Rai 4012 0x16D80, // unassigned 4013 0x16E40, // 16E40..16E9F; Medefaidrin 4014 0x16EA0, // unassigned 4015 0x16F00, // 16F00..16F9F; Miao 4016 0x16FA0, // unassigned 4017 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 4018 0x17000, // 17000..187FF; Tangut 4019 0x18800, // 18800..18AFF; Tangut Components 4020 0x18B00, // 18B00..18CFF; Khitan Small Script 4021 0x18D00, // 18D00..18D7F; Tangut Supplement 4022 0x18D80, // unassigned 4023 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 4024 0x1B000, // 1B000..1B0FF; Kana Supplement 4025 0x1B100, // 1B100..1B12F; Kana Extended-A 4026 0x1B130, // 1B130..1B16F; Small Kana Extension 4027 0x1B170, // 1B170..1B2FF; Nushu 4028 0x1B300, // unassigned 4029 0x1BC00, // 1BC00..1BC9F; Duployan 4030 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 4031 0x1BCB0, // unassigned 4032 0x1CC00, // 1CC00..1CEBF; Symbols for Legacy Computing Supplement 4033 0x1CEC0, // unassigned 4034 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 4035 0x1CFD0, // unassigned 4036 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 4037 0x1D100, // 1D100..1D1FF; Musical Symbols 4038 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 4039 0x1D250, // unassigned 4040 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 4041 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 4042 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 4043 0x1D360, // 1D360..1D37F; Counting Rod Numerals 4044 0x1D380, // unassigned 4045 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 4046 0x1D800, // 1D800..1DAAF; Sutton SignWriting 4047 0x1DAB0, // unassigned 4048 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 4049 0x1E000, // 1E000..1E02F; Glagolitic Supplement 4050 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 4051 0x1E090, // unassigned 4052 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 4053 0x1E150, // unassigned 4054 0x1E290, // 1E290..1E2BF; Toto 4055 0x1E2C0, // 1E2C0..1E2FF; Wancho 4056 0x1E300, // unassigned 4057 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 4058 0x1E500, // unassigned 4059 0x1E5D0, // 1E5D0..1E5FF; Ol Onal 4060 0x1E600, // unassigned 4061 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 4062 0x1E800, // 1E800..1E8DF; Mende Kikakui 4063 0x1E8E0, // unassigned 4064 0x1E900, // 1E900..1E95F; Adlam 4065 0x1E960, // unassigned 4066 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 4067 0x1ECC0, // unassigned 4068 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 4069 0x1ED50, // unassigned 4070 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 4071 0x1EF00, // unassigned 4072 0x1F000, // 1F000..1F02F; Mahjong Tiles 4073 0x1F030, // 1F030..1F09F; Domino Tiles 4074 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 4075 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 4076 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 4077 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 4078 0x1F600, // 1F600..1F64F; Emoticons 4079 0x1F650, // 1F650..1F67F; Ornamental Dingbats 4080 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 4081 0x1F700, // 1F700..1F77F; Alchemical Symbols 4082 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 4083 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 4084 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 4085 0x1FA00, // 1FA00..1FA6F; Chess Symbols 4086 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 4087 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 4088 0x1FC00, // unassigned 4089 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 4090 0x2A6E0, // unassigned 4091 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 4092 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 4093 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 4094 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 4095 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I 4096 0x2EE60, // unassigned 4097 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 4098 0x2FA20, // unassigned 4099 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 4100 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 4101 0x323B0, // unassigned 4102 0xE0000, // E0000..E007F; Tags 4103 0xE0080, // unassigned 4104 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4105 0xE01F0, // unassigned 4106 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4107 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4108 }; 4109 4110 private static final UnicodeBlock[] blocks = { 4111 BASIC_LATIN, 4112 LATIN_1_SUPPLEMENT, 4113 LATIN_EXTENDED_A, 4114 LATIN_EXTENDED_B, 4115 IPA_EXTENSIONS, 4116 SPACING_MODIFIER_LETTERS, 4117 COMBINING_DIACRITICAL_MARKS, 4118 GREEK, 4119 CYRILLIC, 4120 CYRILLIC_SUPPLEMENTARY, 4121 ARMENIAN, 4122 HEBREW, 4123 ARABIC, 4124 SYRIAC, 4125 ARABIC_SUPPLEMENT, 4126 THAANA, 4127 NKO, 4128 SAMARITAN, 4129 MANDAIC, 4130 SYRIAC_SUPPLEMENT, 4131 ARABIC_EXTENDED_B, 4132 ARABIC_EXTENDED_A, 4133 DEVANAGARI, 4134 BENGALI, 4135 GURMUKHI, 4136 GUJARATI, 4137 ORIYA, 4138 TAMIL, 4139 TELUGU, 4140 KANNADA, 4141 MALAYALAM, 4142 SINHALA, 4143 THAI, 4144 LAO, 4145 TIBETAN, 4146 MYANMAR, 4147 GEORGIAN, 4148 HANGUL_JAMO, 4149 ETHIOPIC, 4150 ETHIOPIC_SUPPLEMENT, 4151 CHEROKEE, 4152 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4153 OGHAM, 4154 RUNIC, 4155 TAGALOG, 4156 HANUNOO, 4157 BUHID, 4158 TAGBANWA, 4159 KHMER, 4160 MONGOLIAN, 4161 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4162 LIMBU, 4163 TAI_LE, 4164 NEW_TAI_LUE, 4165 KHMER_SYMBOLS, 4166 BUGINESE, 4167 TAI_THAM, 4168 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4169 BALINESE, 4170 SUNDANESE, 4171 BATAK, 4172 LEPCHA, 4173 OL_CHIKI, 4174 CYRILLIC_EXTENDED_C, 4175 GEORGIAN_EXTENDED, 4176 SUNDANESE_SUPPLEMENT, 4177 VEDIC_EXTENSIONS, 4178 PHONETIC_EXTENSIONS, 4179 PHONETIC_EXTENSIONS_SUPPLEMENT, 4180 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4181 LATIN_EXTENDED_ADDITIONAL, 4182 GREEK_EXTENDED, 4183 GENERAL_PUNCTUATION, 4184 SUPERSCRIPTS_AND_SUBSCRIPTS, 4185 CURRENCY_SYMBOLS, 4186 COMBINING_MARKS_FOR_SYMBOLS, 4187 LETTERLIKE_SYMBOLS, 4188 NUMBER_FORMS, 4189 ARROWS, 4190 MATHEMATICAL_OPERATORS, 4191 MISCELLANEOUS_TECHNICAL, 4192 CONTROL_PICTURES, 4193 OPTICAL_CHARACTER_RECOGNITION, 4194 ENCLOSED_ALPHANUMERICS, 4195 BOX_DRAWING, 4196 BLOCK_ELEMENTS, 4197 GEOMETRIC_SHAPES, 4198 MISCELLANEOUS_SYMBOLS, 4199 DINGBATS, 4200 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4201 SUPPLEMENTAL_ARROWS_A, 4202 BRAILLE_PATTERNS, 4203 SUPPLEMENTAL_ARROWS_B, 4204 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4205 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4206 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4207 GLAGOLITIC, 4208 LATIN_EXTENDED_C, 4209 COPTIC, 4210 GEORGIAN_SUPPLEMENT, 4211 TIFINAGH, 4212 ETHIOPIC_EXTENDED, 4213 CYRILLIC_EXTENDED_A, 4214 SUPPLEMENTAL_PUNCTUATION, 4215 CJK_RADICALS_SUPPLEMENT, 4216 KANGXI_RADICALS, 4217 null, 4218 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4219 CJK_SYMBOLS_AND_PUNCTUATION, 4220 HIRAGANA, 4221 KATAKANA, 4222 BOPOMOFO, 4223 HANGUL_COMPATIBILITY_JAMO, 4224 KANBUN, 4225 BOPOMOFO_EXTENDED, 4226 CJK_STROKES, 4227 KATAKANA_PHONETIC_EXTENSIONS, 4228 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4229 CJK_COMPATIBILITY, 4230 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4231 YIJING_HEXAGRAM_SYMBOLS, 4232 CJK_UNIFIED_IDEOGRAPHS, 4233 YI_SYLLABLES, 4234 YI_RADICALS, 4235 LISU, 4236 VAI, 4237 CYRILLIC_EXTENDED_B, 4238 BAMUM, 4239 MODIFIER_TONE_LETTERS, 4240 LATIN_EXTENDED_D, 4241 SYLOTI_NAGRI, 4242 COMMON_INDIC_NUMBER_FORMS, 4243 PHAGS_PA, 4244 SAURASHTRA, 4245 DEVANAGARI_EXTENDED, 4246 KAYAH_LI, 4247 REJANG, 4248 HANGUL_JAMO_EXTENDED_A, 4249 JAVANESE, 4250 MYANMAR_EXTENDED_B, 4251 CHAM, 4252 MYANMAR_EXTENDED_A, 4253 TAI_VIET, 4254 MEETEI_MAYEK_EXTENSIONS, 4255 ETHIOPIC_EXTENDED_A, 4256 LATIN_EXTENDED_E, 4257 CHEROKEE_SUPPLEMENT, 4258 MEETEI_MAYEK, 4259 HANGUL_SYLLABLES, 4260 HANGUL_JAMO_EXTENDED_B, 4261 HIGH_SURROGATES, 4262 HIGH_PRIVATE_USE_SURROGATES, 4263 LOW_SURROGATES, 4264 PRIVATE_USE_AREA, 4265 CJK_COMPATIBILITY_IDEOGRAPHS, 4266 ALPHABETIC_PRESENTATION_FORMS, 4267 ARABIC_PRESENTATION_FORMS_A, 4268 VARIATION_SELECTORS, 4269 VERTICAL_FORMS, 4270 COMBINING_HALF_MARKS, 4271 CJK_COMPATIBILITY_FORMS, 4272 SMALL_FORM_VARIANTS, 4273 ARABIC_PRESENTATION_FORMS_B, 4274 HALFWIDTH_AND_FULLWIDTH_FORMS, 4275 SPECIALS, 4276 LINEAR_B_SYLLABARY, 4277 LINEAR_B_IDEOGRAMS, 4278 AEGEAN_NUMBERS, 4279 ANCIENT_GREEK_NUMBERS, 4280 ANCIENT_SYMBOLS, 4281 PHAISTOS_DISC, 4282 null, 4283 LYCIAN, 4284 CARIAN, 4285 COPTIC_EPACT_NUMBERS, 4286 OLD_ITALIC, 4287 GOTHIC, 4288 OLD_PERMIC, 4289 UGARITIC, 4290 OLD_PERSIAN, 4291 null, 4292 DESERET, 4293 SHAVIAN, 4294 OSMANYA, 4295 OSAGE, 4296 ELBASAN, 4297 CAUCASIAN_ALBANIAN, 4298 VITHKUQI, 4299 TODHRI, 4300 LINEAR_A, 4301 LATIN_EXTENDED_F, 4302 null, 4303 CYPRIOT_SYLLABARY, 4304 IMPERIAL_ARAMAIC, 4305 PALMYRENE, 4306 NABATAEAN, 4307 null, 4308 HATRAN, 4309 PHOENICIAN, 4310 LYDIAN, 4311 null, 4312 MEROITIC_HIEROGLYPHS, 4313 MEROITIC_CURSIVE, 4314 KHAROSHTHI, 4315 OLD_SOUTH_ARABIAN, 4316 OLD_NORTH_ARABIAN, 4317 null, 4318 MANICHAEAN, 4319 AVESTAN, 4320 INSCRIPTIONAL_PARTHIAN, 4321 INSCRIPTIONAL_PAHLAVI, 4322 PSALTER_PAHLAVI, 4323 null, 4324 OLD_TURKIC, 4325 null, 4326 OLD_HUNGARIAN, 4327 HANIFI_ROHINGYA, 4328 GARAY, 4329 null, 4330 RUMI_NUMERAL_SYMBOLS, 4331 YEZIDI, 4332 ARABIC_EXTENDED_C, 4333 OLD_SOGDIAN, 4334 SOGDIAN, 4335 OLD_UYGHUR, 4336 CHORASMIAN, 4337 ELYMAIC, 4338 BRAHMI, 4339 KAITHI, 4340 SORA_SOMPENG, 4341 CHAKMA, 4342 MAHAJANI, 4343 SHARADA, 4344 SINHALA_ARCHAIC_NUMBERS, 4345 KHOJKI, 4346 null, 4347 MULTANI, 4348 KHUDAWADI, 4349 GRANTHA, 4350 TULU_TIGALARI, 4351 NEWA, 4352 TIRHUTA, 4353 null, 4354 SIDDHAM, 4355 MODI, 4356 MONGOLIAN_SUPPLEMENT, 4357 TAKRI, 4358 MYANMAR_EXTENDED_C, 4359 AHOM, 4360 null, 4361 DOGRA, 4362 null, 4363 WARANG_CITI, 4364 DIVES_AKURU, 4365 null, 4366 NANDINAGARI, 4367 ZANABAZAR_SQUARE, 4368 SOYOMBO, 4369 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4370 PAU_CIN_HAU, 4371 DEVANAGARI_EXTENDED_A, 4372 null, 4373 SUNUWAR, 4374 BHAIKSUKI, 4375 MARCHEN, 4376 null, 4377 MASARAM_GONDI, 4378 GUNJALA_GONDI, 4379 null, 4380 MAKASAR, 4381 KAWI, 4382 null, 4383 LISU_SUPPLEMENT, 4384 TAMIL_SUPPLEMENT, 4385 CUNEIFORM, 4386 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4387 EARLY_DYNASTIC_CUNEIFORM, 4388 null, 4389 CYPRO_MINOAN, 4390 EGYPTIAN_HIEROGLYPHS, 4391 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4392 EGYPTIAN_HIEROGLYPHS_EXTENDED_A, 4393 ANATOLIAN_HIEROGLYPHS, 4394 null, 4395 GURUNG_KHEMA, 4396 null, 4397 BAMUM_SUPPLEMENT, 4398 MRO, 4399 TANGSA, 4400 BASSA_VAH, 4401 PAHAWH_HMONG, 4402 null, 4403 KIRAT_RAI, 4404 null, 4405 MEDEFAIDRIN, 4406 null, 4407 MIAO, 4408 null, 4409 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4410 TANGUT, 4411 TANGUT_COMPONENTS, 4412 KHITAN_SMALL_SCRIPT, 4413 TANGUT_SUPPLEMENT, 4414 null, 4415 KANA_EXTENDED_B, 4416 KANA_SUPPLEMENT, 4417 KANA_EXTENDED_A, 4418 SMALL_KANA_EXTENSION, 4419 NUSHU, 4420 null, 4421 DUPLOYAN, 4422 SHORTHAND_FORMAT_CONTROLS, 4423 null, 4424 SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT, 4425 null, 4426 ZNAMENNY_MUSICAL_NOTATION, 4427 null, 4428 BYZANTINE_MUSICAL_SYMBOLS, 4429 MUSICAL_SYMBOLS, 4430 ANCIENT_GREEK_MUSICAL_NOTATION, 4431 null, 4432 KAKTOVIK_NUMERALS, 4433 MAYAN_NUMERALS, 4434 TAI_XUAN_JING_SYMBOLS, 4435 COUNTING_ROD_NUMERALS, 4436 null, 4437 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4438 SUTTON_SIGNWRITING, 4439 null, 4440 LATIN_EXTENDED_G, 4441 GLAGOLITIC_SUPPLEMENT, 4442 CYRILLIC_EXTENDED_D, 4443 null, 4444 NYIAKENG_PUACHUE_HMONG, 4445 null, 4446 TOTO, 4447 WANCHO, 4448 null, 4449 NAG_MUNDARI, 4450 null, 4451 OL_ONAL, 4452 null, 4453 ETHIOPIC_EXTENDED_B, 4454 MENDE_KIKAKUI, 4455 null, 4456 ADLAM, 4457 null, 4458 INDIC_SIYAQ_NUMBERS, 4459 null, 4460 OTTOMAN_SIYAQ_NUMBERS, 4461 null, 4462 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4463 null, 4464 MAHJONG_TILES, 4465 DOMINO_TILES, 4466 PLAYING_CARDS, 4467 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4468 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4469 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4470 EMOTICONS, 4471 ORNAMENTAL_DINGBATS, 4472 TRANSPORT_AND_MAP_SYMBOLS, 4473 ALCHEMICAL_SYMBOLS, 4474 GEOMETRIC_SHAPES_EXTENDED, 4475 SUPPLEMENTAL_ARROWS_C, 4476 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4477 CHESS_SYMBOLS, 4478 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4479 SYMBOLS_FOR_LEGACY_COMPUTING, 4480 null, 4481 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4482 null, 4483 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4484 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4485 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4486 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4487 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, 4488 null, 4489 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4490 null, 4491 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4492 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4493 null, 4494 TAGS, 4495 null, 4496 VARIATION_SELECTORS_SUPPLEMENT, 4497 null, 4498 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4499 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4500 }; 4501 4502 4503 /** 4504 * Returns the object representing the Unicode block containing the 4505 * given character, or {@code null} if the character is not a 4506 * member of a defined block. 4507 * 4508 * <p><b>Note:</b> This method cannot handle 4509 * <a href="Character.html#supplementary"> supplementary 4510 * characters</a>. To support all Unicode characters, including 4511 * supplementary characters, use the {@link #of(int)} method. 4512 * 4513 * @param c The character in question 4514 * @return The {@code UnicodeBlock} instance representing the 4515 * Unicode block of which this character is a member, or 4516 * {@code null} if the character is not a member of any 4517 * Unicode block 4518 */ 4519 public static UnicodeBlock of(char c) { 4520 return of((int)c); 4521 } 4522 4523 /** 4524 * Returns the object representing the Unicode block 4525 * containing the given character (Unicode code point), or 4526 * {@code null} if the character is not a member of a 4527 * defined block. 4528 * 4529 * @param codePoint the character (Unicode code point) in question. 4530 * @return The {@code UnicodeBlock} instance representing the 4531 * Unicode block of which this character is a member, or 4532 * {@code null} if the character is not a member of any 4533 * Unicode block 4534 * @throws IllegalArgumentException if the specified 4535 * {@code codePoint} is an invalid Unicode code point. 4536 * @see Character#isValidCodePoint(int) 4537 * @since 1.5 4538 */ 4539 public static UnicodeBlock of(int codePoint) { 4540 if (!isValidCodePoint(codePoint)) { 4541 throw new IllegalArgumentException( 4542 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4543 } 4544 4545 int top, bottom, current; 4546 bottom = 0; 4547 top = blockStarts.length; 4548 current = top/2; 4549 4550 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4551 while (top - bottom > 1) { 4552 if (codePoint >= blockStarts[current]) { 4553 bottom = current; 4554 } else { 4555 top = current; 4556 } 4557 current = (top + bottom) / 2; 4558 } 4559 return blocks[current]; 4560 } 4561 4562 /** 4563 * Returns the UnicodeBlock with the given name. Block 4564 * names are determined by The Unicode Standard. The file 4565 * {@code Blocks.txt} defines blocks for a particular 4566 * version of the standard. The {@link Character} class specifies 4567 * the version of the standard that it supports. 4568 * <p> 4569 * This method accepts block names in the following forms: 4570 * <ol> 4571 * <li> Canonical block names as defined by the Unicode Standard. 4572 * For example, the standard defines a "Basic Latin" block. Therefore, this 4573 * method accepts "Basic Latin" as a valid block name. The documentation of 4574 * each UnicodeBlock provides the canonical name. 4575 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4576 * is a valid block name for the "Basic Latin" block. 4577 * <li>The text representation of each constant UnicodeBlock identifier. 4578 * For example, this method will return the {@link #BASIC_LATIN} block if 4579 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4580 * hyphens in the canonical name with underscores. 4581 * </ol> 4582 * Finally, character case is ignored for all of the valid block name forms. 4583 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4584 * The en_US locale's case mapping rules are used to provide case-insensitive 4585 * string comparisons for block name validation. 4586 * <p> 4587 * If the Unicode Standard changes block names, both the previous and 4588 * current names will be accepted. 4589 * 4590 * @param blockName A {@code UnicodeBlock} name. 4591 * @return The {@code UnicodeBlock} instance identified 4592 * by {@code blockName} 4593 * @throws IllegalArgumentException if {@code blockName} is an 4594 * invalid name 4595 * @throws NullPointerException if {@code blockName} is null 4596 * @since 1.5 4597 */ 4598 public static final UnicodeBlock forName(String blockName) { 4599 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4600 if (block == null) { 4601 throw new IllegalArgumentException("Not a valid block name: " 4602 + blockName); 4603 } 4604 return block; 4605 } 4606 } 4607 4608 4609 /** 4610 * A family of character subsets representing the character scripts 4611 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4612 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4613 * character is assigned to a single Unicode script, either a specific 4614 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4615 * one of the following three special values, 4616 * {@link Character.UnicodeScript#INHERITED Inherited}, 4617 * {@link Character.UnicodeScript#COMMON Common} or 4618 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4619 * 4620 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4621 * @since 1.7 4622 */ 4623 public static enum UnicodeScript { 4624 4625 /** 4626 * Unicode script "Common". 4627 */ 4628 COMMON, 4629 4630 /** 4631 * Unicode script "Latin". 4632 */ 4633 LATIN, 4634 4635 /** 4636 * Unicode script "Greek". 4637 */ 4638 GREEK, 4639 4640 /** 4641 * Unicode script "Cyrillic". 4642 */ 4643 CYRILLIC, 4644 4645 /** 4646 * Unicode script "Armenian". 4647 */ 4648 ARMENIAN, 4649 4650 /** 4651 * Unicode script "Hebrew". 4652 */ 4653 HEBREW, 4654 4655 /** 4656 * Unicode script "Arabic". 4657 */ 4658 ARABIC, 4659 4660 /** 4661 * Unicode script "Syriac". 4662 */ 4663 SYRIAC, 4664 4665 /** 4666 * Unicode script "Thaana". 4667 */ 4668 THAANA, 4669 4670 /** 4671 * Unicode script "Devanagari". 4672 */ 4673 DEVANAGARI, 4674 4675 /** 4676 * Unicode script "Bengali". 4677 */ 4678 BENGALI, 4679 4680 /** 4681 * Unicode script "Gurmukhi". 4682 */ 4683 GURMUKHI, 4684 4685 /** 4686 * Unicode script "Gujarati". 4687 */ 4688 GUJARATI, 4689 4690 /** 4691 * Unicode script "Oriya". 4692 */ 4693 ORIYA, 4694 4695 /** 4696 * Unicode script "Tamil". 4697 */ 4698 TAMIL, 4699 4700 /** 4701 * Unicode script "Telugu". 4702 */ 4703 TELUGU, 4704 4705 /** 4706 * Unicode script "Kannada". 4707 */ 4708 KANNADA, 4709 4710 /** 4711 * Unicode script "Malayalam". 4712 */ 4713 MALAYALAM, 4714 4715 /** 4716 * Unicode script "Sinhala". 4717 */ 4718 SINHALA, 4719 4720 /** 4721 * Unicode script "Thai". 4722 */ 4723 THAI, 4724 4725 /** 4726 * Unicode script "Lao". 4727 */ 4728 LAO, 4729 4730 /** 4731 * Unicode script "Tibetan". 4732 */ 4733 TIBETAN, 4734 4735 /** 4736 * Unicode script "Myanmar". 4737 */ 4738 MYANMAR, 4739 4740 /** 4741 * Unicode script "Georgian". 4742 */ 4743 GEORGIAN, 4744 4745 /** 4746 * Unicode script "Hangul". 4747 */ 4748 HANGUL, 4749 4750 /** 4751 * Unicode script "Ethiopic". 4752 */ 4753 ETHIOPIC, 4754 4755 /** 4756 * Unicode script "Cherokee". 4757 */ 4758 CHEROKEE, 4759 4760 /** 4761 * Unicode script "Canadian_Aboriginal". 4762 */ 4763 CANADIAN_ABORIGINAL, 4764 4765 /** 4766 * Unicode script "Ogham". 4767 */ 4768 OGHAM, 4769 4770 /** 4771 * Unicode script "Runic". 4772 */ 4773 RUNIC, 4774 4775 /** 4776 * Unicode script "Khmer". 4777 */ 4778 KHMER, 4779 4780 /** 4781 * Unicode script "Mongolian". 4782 */ 4783 MONGOLIAN, 4784 4785 /** 4786 * Unicode script "Hiragana". 4787 */ 4788 HIRAGANA, 4789 4790 /** 4791 * Unicode script "Katakana". 4792 */ 4793 KATAKANA, 4794 4795 /** 4796 * Unicode script "Bopomofo". 4797 */ 4798 BOPOMOFO, 4799 4800 /** 4801 * Unicode script "Han". 4802 */ 4803 HAN, 4804 4805 /** 4806 * Unicode script "Yi". 4807 */ 4808 YI, 4809 4810 /** 4811 * Unicode script "Old_Italic". 4812 */ 4813 OLD_ITALIC, 4814 4815 /** 4816 * Unicode script "Gothic". 4817 */ 4818 GOTHIC, 4819 4820 /** 4821 * Unicode script "Deseret". 4822 */ 4823 DESERET, 4824 4825 /** 4826 * Unicode script "Inherited". 4827 */ 4828 INHERITED, 4829 4830 /** 4831 * Unicode script "Tagalog". 4832 */ 4833 TAGALOG, 4834 4835 /** 4836 * Unicode script "Hanunoo". 4837 */ 4838 HANUNOO, 4839 4840 /** 4841 * Unicode script "Buhid". 4842 */ 4843 BUHID, 4844 4845 /** 4846 * Unicode script "Tagbanwa". 4847 */ 4848 TAGBANWA, 4849 4850 /** 4851 * Unicode script "Limbu". 4852 */ 4853 LIMBU, 4854 4855 /** 4856 * Unicode script "Tai_Le". 4857 */ 4858 TAI_LE, 4859 4860 /** 4861 * Unicode script "Linear_B". 4862 */ 4863 LINEAR_B, 4864 4865 /** 4866 * Unicode script "Ugaritic". 4867 */ 4868 UGARITIC, 4869 4870 /** 4871 * Unicode script "Shavian". 4872 */ 4873 SHAVIAN, 4874 4875 /** 4876 * Unicode script "Osmanya". 4877 */ 4878 OSMANYA, 4879 4880 /** 4881 * Unicode script "Cypriot". 4882 */ 4883 CYPRIOT, 4884 4885 /** 4886 * Unicode script "Braille". 4887 */ 4888 BRAILLE, 4889 4890 /** 4891 * Unicode script "Buginese". 4892 */ 4893 BUGINESE, 4894 4895 /** 4896 * Unicode script "Coptic". 4897 */ 4898 COPTIC, 4899 4900 /** 4901 * Unicode script "New_Tai_Lue". 4902 */ 4903 NEW_TAI_LUE, 4904 4905 /** 4906 * Unicode script "Glagolitic". 4907 */ 4908 GLAGOLITIC, 4909 4910 /** 4911 * Unicode script "Tifinagh". 4912 */ 4913 TIFINAGH, 4914 4915 /** 4916 * Unicode script "Syloti_Nagri". 4917 */ 4918 SYLOTI_NAGRI, 4919 4920 /** 4921 * Unicode script "Old_Persian". 4922 */ 4923 OLD_PERSIAN, 4924 4925 /** 4926 * Unicode script "Kharoshthi". 4927 */ 4928 KHAROSHTHI, 4929 4930 /** 4931 * Unicode script "Balinese". 4932 */ 4933 BALINESE, 4934 4935 /** 4936 * Unicode script "Cuneiform". 4937 */ 4938 CUNEIFORM, 4939 4940 /** 4941 * Unicode script "Phoenician". 4942 */ 4943 PHOENICIAN, 4944 4945 /** 4946 * Unicode script "Phags_Pa". 4947 */ 4948 PHAGS_PA, 4949 4950 /** 4951 * Unicode script "Nko". 4952 */ 4953 NKO, 4954 4955 /** 4956 * Unicode script "Sundanese". 4957 */ 4958 SUNDANESE, 4959 4960 /** 4961 * Unicode script "Batak". 4962 */ 4963 BATAK, 4964 4965 /** 4966 * Unicode script "Lepcha". 4967 */ 4968 LEPCHA, 4969 4970 /** 4971 * Unicode script "Ol_Chiki". 4972 */ 4973 OL_CHIKI, 4974 4975 /** 4976 * Unicode script "Vai". 4977 */ 4978 VAI, 4979 4980 /** 4981 * Unicode script "Saurashtra". 4982 */ 4983 SAURASHTRA, 4984 4985 /** 4986 * Unicode script "Kayah_Li". 4987 */ 4988 KAYAH_LI, 4989 4990 /** 4991 * Unicode script "Rejang". 4992 */ 4993 REJANG, 4994 4995 /** 4996 * Unicode script "Lycian". 4997 */ 4998 LYCIAN, 4999 5000 /** 5001 * Unicode script "Carian". 5002 */ 5003 CARIAN, 5004 5005 /** 5006 * Unicode script "Lydian". 5007 */ 5008 LYDIAN, 5009 5010 /** 5011 * Unicode script "Cham". 5012 */ 5013 CHAM, 5014 5015 /** 5016 * Unicode script "Tai_Tham". 5017 */ 5018 TAI_THAM, 5019 5020 /** 5021 * Unicode script "Tai_Viet". 5022 */ 5023 TAI_VIET, 5024 5025 /** 5026 * Unicode script "Avestan". 5027 */ 5028 AVESTAN, 5029 5030 /** 5031 * Unicode script "Egyptian_Hieroglyphs". 5032 */ 5033 EGYPTIAN_HIEROGLYPHS, 5034 5035 /** 5036 * Unicode script "Samaritan". 5037 */ 5038 SAMARITAN, 5039 5040 /** 5041 * Unicode script "Mandaic". 5042 */ 5043 MANDAIC, 5044 5045 /** 5046 * Unicode script "Lisu". 5047 */ 5048 LISU, 5049 5050 /** 5051 * Unicode script "Bamum". 5052 */ 5053 BAMUM, 5054 5055 /** 5056 * Unicode script "Javanese". 5057 */ 5058 JAVANESE, 5059 5060 /** 5061 * Unicode script "Meetei_Mayek". 5062 */ 5063 MEETEI_MAYEK, 5064 5065 /** 5066 * Unicode script "Imperial_Aramaic". 5067 */ 5068 IMPERIAL_ARAMAIC, 5069 5070 /** 5071 * Unicode script "Old_South_Arabian". 5072 */ 5073 OLD_SOUTH_ARABIAN, 5074 5075 /** 5076 * Unicode script "Inscriptional_Parthian". 5077 */ 5078 INSCRIPTIONAL_PARTHIAN, 5079 5080 /** 5081 * Unicode script "Inscriptional_Pahlavi". 5082 */ 5083 INSCRIPTIONAL_PAHLAVI, 5084 5085 /** 5086 * Unicode script "Old_Turkic". 5087 */ 5088 OLD_TURKIC, 5089 5090 /** 5091 * Unicode script "Brahmi". 5092 */ 5093 BRAHMI, 5094 5095 /** 5096 * Unicode script "Kaithi". 5097 */ 5098 KAITHI, 5099 5100 /** 5101 * Unicode script "Meroitic Hieroglyphs". 5102 * @since 1.8 5103 */ 5104 MEROITIC_HIEROGLYPHS, 5105 5106 /** 5107 * Unicode script "Meroitic Cursive". 5108 * @since 1.8 5109 */ 5110 MEROITIC_CURSIVE, 5111 5112 /** 5113 * Unicode script "Sora Sompeng". 5114 * @since 1.8 5115 */ 5116 SORA_SOMPENG, 5117 5118 /** 5119 * Unicode script "Chakma". 5120 * @since 1.8 5121 */ 5122 CHAKMA, 5123 5124 /** 5125 * Unicode script "Sharada". 5126 * @since 1.8 5127 */ 5128 SHARADA, 5129 5130 /** 5131 * Unicode script "Takri". 5132 * @since 1.8 5133 */ 5134 TAKRI, 5135 5136 /** 5137 * Unicode script "Miao". 5138 * @since 1.8 5139 */ 5140 MIAO, 5141 5142 /** 5143 * Unicode script "Caucasian Albanian". 5144 * @since 9 5145 */ 5146 CAUCASIAN_ALBANIAN, 5147 5148 /** 5149 * Unicode script "Bassa Vah". 5150 * @since 9 5151 */ 5152 BASSA_VAH, 5153 5154 /** 5155 * Unicode script "Duployan". 5156 * @since 9 5157 */ 5158 DUPLOYAN, 5159 5160 /** 5161 * Unicode script "Elbasan". 5162 * @since 9 5163 */ 5164 ELBASAN, 5165 5166 /** 5167 * Unicode script "Grantha". 5168 * @since 9 5169 */ 5170 GRANTHA, 5171 5172 /** 5173 * Unicode script "Pahawh Hmong". 5174 * @since 9 5175 */ 5176 PAHAWH_HMONG, 5177 5178 /** 5179 * Unicode script "Khojki". 5180 * @since 9 5181 */ 5182 KHOJKI, 5183 5184 /** 5185 * Unicode script "Linear A". 5186 * @since 9 5187 */ 5188 LINEAR_A, 5189 5190 /** 5191 * Unicode script "Mahajani". 5192 * @since 9 5193 */ 5194 MAHAJANI, 5195 5196 /** 5197 * Unicode script "Manichaean". 5198 * @since 9 5199 */ 5200 MANICHAEAN, 5201 5202 /** 5203 * Unicode script "Mende Kikakui". 5204 * @since 9 5205 */ 5206 MENDE_KIKAKUI, 5207 5208 /** 5209 * Unicode script "Modi". 5210 * @since 9 5211 */ 5212 MODI, 5213 5214 /** 5215 * Unicode script "Mro". 5216 * @since 9 5217 */ 5218 MRO, 5219 5220 /** 5221 * Unicode script "Old North Arabian". 5222 * @since 9 5223 */ 5224 OLD_NORTH_ARABIAN, 5225 5226 /** 5227 * Unicode script "Nabataean". 5228 * @since 9 5229 */ 5230 NABATAEAN, 5231 5232 /** 5233 * Unicode script "Palmyrene". 5234 * @since 9 5235 */ 5236 PALMYRENE, 5237 5238 /** 5239 * Unicode script "Pau Cin Hau". 5240 * @since 9 5241 */ 5242 PAU_CIN_HAU, 5243 5244 /** 5245 * Unicode script "Old Permic". 5246 * @since 9 5247 */ 5248 OLD_PERMIC, 5249 5250 /** 5251 * Unicode script "Psalter Pahlavi". 5252 * @since 9 5253 */ 5254 PSALTER_PAHLAVI, 5255 5256 /** 5257 * Unicode script "Siddham". 5258 * @since 9 5259 */ 5260 SIDDHAM, 5261 5262 /** 5263 * Unicode script "Khudawadi". 5264 * @since 9 5265 */ 5266 KHUDAWADI, 5267 5268 /** 5269 * Unicode script "Tirhuta". 5270 * @since 9 5271 */ 5272 TIRHUTA, 5273 5274 /** 5275 * Unicode script "Warang Citi". 5276 * @since 9 5277 */ 5278 WARANG_CITI, 5279 5280 /** 5281 * Unicode script "Ahom". 5282 * @since 9 5283 */ 5284 AHOM, 5285 5286 /** 5287 * Unicode script "Anatolian Hieroglyphs". 5288 * @since 9 5289 */ 5290 ANATOLIAN_HIEROGLYPHS, 5291 5292 /** 5293 * Unicode script "Hatran". 5294 * @since 9 5295 */ 5296 HATRAN, 5297 5298 /** 5299 * Unicode script "Multani". 5300 * @since 9 5301 */ 5302 MULTANI, 5303 5304 /** 5305 * Unicode script "Old Hungarian". 5306 * @since 9 5307 */ 5308 OLD_HUNGARIAN, 5309 5310 /** 5311 * Unicode script "SignWriting". 5312 * @since 9 5313 */ 5314 SIGNWRITING, 5315 5316 /** 5317 * Unicode script "Adlam". 5318 * @since 11 5319 */ 5320 ADLAM, 5321 5322 /** 5323 * Unicode script "Bhaiksuki". 5324 * @since 11 5325 */ 5326 BHAIKSUKI, 5327 5328 /** 5329 * Unicode script "Marchen". 5330 * @since 11 5331 */ 5332 MARCHEN, 5333 5334 /** 5335 * Unicode script "Newa". 5336 * @since 11 5337 */ 5338 NEWA, 5339 5340 /** 5341 * Unicode script "Osage". 5342 * @since 11 5343 */ 5344 OSAGE, 5345 5346 /** 5347 * Unicode script "Tangut". 5348 * @since 11 5349 */ 5350 TANGUT, 5351 5352 /** 5353 * Unicode script "Masaram Gondi". 5354 * @since 11 5355 */ 5356 MASARAM_GONDI, 5357 5358 /** 5359 * Unicode script "Nushu". 5360 * @since 11 5361 */ 5362 NUSHU, 5363 5364 /** 5365 * Unicode script "Soyombo". 5366 * @since 11 5367 */ 5368 SOYOMBO, 5369 5370 /** 5371 * Unicode script "Zanabazar Square". 5372 * @since 11 5373 */ 5374 ZANABAZAR_SQUARE, 5375 5376 /** 5377 * Unicode script "Hanifi Rohingya". 5378 * @since 12 5379 */ 5380 HANIFI_ROHINGYA, 5381 5382 /** 5383 * Unicode script "Old Sogdian". 5384 * @since 12 5385 */ 5386 OLD_SOGDIAN, 5387 5388 /** 5389 * Unicode script "Sogdian". 5390 * @since 12 5391 */ 5392 SOGDIAN, 5393 5394 /** 5395 * Unicode script "Dogra". 5396 * @since 12 5397 */ 5398 DOGRA, 5399 5400 /** 5401 * Unicode script "Gunjala Gondi". 5402 * @since 12 5403 */ 5404 GUNJALA_GONDI, 5405 5406 /** 5407 * Unicode script "Makasar". 5408 * @since 12 5409 */ 5410 MAKASAR, 5411 5412 /** 5413 * Unicode script "Medefaidrin". 5414 * @since 12 5415 */ 5416 MEDEFAIDRIN, 5417 5418 /** 5419 * Unicode script "Elymaic". 5420 * @since 13 5421 */ 5422 ELYMAIC, 5423 5424 /** 5425 * Unicode script "Nandinagari". 5426 * @since 13 5427 */ 5428 NANDINAGARI, 5429 5430 /** 5431 * Unicode script "Nyiakeng Puachue Hmong". 5432 * @since 13 5433 */ 5434 NYIAKENG_PUACHUE_HMONG, 5435 5436 /** 5437 * Unicode script "Wancho". 5438 * @since 13 5439 */ 5440 WANCHO, 5441 5442 /** 5443 * Unicode script "Yezidi". 5444 * @since 15 5445 */ 5446 YEZIDI, 5447 5448 /** 5449 * Unicode script "Chorasmian". 5450 * @since 15 5451 */ 5452 CHORASMIAN, 5453 5454 /** 5455 * Unicode script "Dives Akuru". 5456 * @since 15 5457 */ 5458 DIVES_AKURU, 5459 5460 /** 5461 * Unicode script "Khitan Small Script". 5462 * @since 15 5463 */ 5464 KHITAN_SMALL_SCRIPT, 5465 5466 /** 5467 * Unicode script "Vithkuqi". 5468 * @since 19 5469 */ 5470 VITHKUQI, 5471 5472 /** 5473 * Unicode script "Old Uyghur". 5474 * @since 19 5475 */ 5476 OLD_UYGHUR, 5477 5478 /** 5479 * Unicode script "Cypro Minoan". 5480 * @since 19 5481 */ 5482 CYPRO_MINOAN, 5483 5484 /** 5485 * Unicode script "Tangsa". 5486 * @since 19 5487 */ 5488 TANGSA, 5489 5490 /** 5491 * Unicode script "Toto". 5492 * @since 19 5493 */ 5494 TOTO, 5495 5496 /** 5497 * Unicode script "Kawi". 5498 * @since 20 5499 */ 5500 KAWI, 5501 5502 /** 5503 * Unicode script "Nag Mundari". 5504 * @since 20 5505 */ 5506 NAG_MUNDARI, 5507 5508 /** 5509 * Unicode script "Todhri". 5510 * @since 24 5511 */ 5512 TODHRI, 5513 5514 /** 5515 * Unicode script "Garay". 5516 * @since 24 5517 */ 5518 GARAY, 5519 5520 /** 5521 * Unicode script "Tulu Tigalari". 5522 * @since 24 5523 */ 5524 TULU_TIGALARI, 5525 5526 /** 5527 * Unicode script "Sunuwar". 5528 * @since 24 5529 */ 5530 SUNUWAR, 5531 5532 /** 5533 * Unicode script "Gurung Khema". 5534 * @since 24 5535 */ 5536 GURUNG_KHEMA, 5537 5538 /** 5539 * Unicode script "Kirat Rai". 5540 * @since 24 5541 */ 5542 KIRAT_RAI, 5543 5544 /** 5545 * Unicode script "Ol Onal". 5546 * @since 24 5547 */ 5548 OL_ONAL, 5549 5550 /** 5551 * Unicode script "Unknown". 5552 */ 5553 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5554 5555 private static final int[] scriptStarts = { 5556 0x0000, // 0000..0040; COMMON 5557 0x0041, // 0041..005A; LATIN 5558 0x005B, // 005B..0060; COMMON 5559 0x0061, // 0061..007A; LATIN 5560 0x007B, // 007B..00A9; COMMON 5561 0x00AA, // 00AA ; LATIN 5562 0x00AB, // 00AB..00B9; COMMON 5563 0x00BA, // 00BA ; LATIN 5564 0x00BB, // 00BB..00BF; COMMON 5565 0x00C0, // 00C0..00D6; LATIN 5566 0x00D7, // 00D7 ; COMMON 5567 0x00D8, // 00D8..00F6; LATIN 5568 0x00F7, // 00F7 ; COMMON 5569 0x00F8, // 00F8..02B8; LATIN 5570 0x02B9, // 02B9..02DF; COMMON 5571 0x02E0, // 02E0..02E4; LATIN 5572 0x02E5, // 02E5..02E9; COMMON 5573 0x02EA, // 02EA..02EB; BOPOMOFO 5574 0x02EC, // 02EC..02FF; COMMON 5575 0x0300, // 0300..036F; INHERITED 5576 0x0370, // 0370..0373; GREEK 5577 0x0374, // 0374 ; COMMON 5578 0x0375, // 0375..0377; GREEK 5579 0x0378, // 0378..0379; UNKNOWN 5580 0x037A, // 037A..037D; GREEK 5581 0x037E, // 037E ; COMMON 5582 0x037F, // 037F ; GREEK 5583 0x0380, // 0380..0383; UNKNOWN 5584 0x0384, // 0384 ; GREEK 5585 0x0385, // 0385 ; COMMON 5586 0x0386, // 0386 ; GREEK 5587 0x0387, // 0387 ; COMMON 5588 0x0388, // 0388..038A; GREEK 5589 0x038B, // 038B ; UNKNOWN 5590 0x038C, // 038C ; GREEK 5591 0x038D, // 038D ; UNKNOWN 5592 0x038E, // 038E..03A1; GREEK 5593 0x03A2, // 03A2 ; UNKNOWN 5594 0x03A3, // 03A3..03E1; GREEK 5595 0x03E2, // 03E2..03EF; COPTIC 5596 0x03F0, // 03F0..03FF; GREEK 5597 0x0400, // 0400..0484; CYRILLIC 5598 0x0485, // 0485..0486; INHERITED 5599 0x0487, // 0487..052F; CYRILLIC 5600 0x0530, // 0530 ; UNKNOWN 5601 0x0531, // 0531..0556; ARMENIAN 5602 0x0557, // 0557..0558; UNKNOWN 5603 0x0559, // 0559..058A; ARMENIAN 5604 0x058B, // 058B..058C; UNKNOWN 5605 0x058D, // 058D..058F; ARMENIAN 5606 0x0590, // 0590 ; UNKNOWN 5607 0x0591, // 0591..05C7; HEBREW 5608 0x05C8, // 05C8..05CF; UNKNOWN 5609 0x05D0, // 05D0..05EA; HEBREW 5610 0x05EB, // 05EB..05EE; UNKNOWN 5611 0x05EF, // 05EF..05F4; HEBREW 5612 0x05F5, // 05F5..05FF; UNKNOWN 5613 0x0600, // 0600..0604; ARABIC 5614 0x0605, // 0605 ; COMMON 5615 0x0606, // 0606..060B; ARABIC 5616 0x060C, // 060C ; COMMON 5617 0x060D, // 060D..061A; ARABIC 5618 0x061B, // 061B ; COMMON 5619 0x061C, // 061C..061E; ARABIC 5620 0x061F, // 061F ; COMMON 5621 0x0620, // 0620..063F; ARABIC 5622 0x0640, // 0640 ; COMMON 5623 0x0641, // 0641..064A; ARABIC 5624 0x064B, // 064B..0655; INHERITED 5625 0x0656, // 0656..066F; ARABIC 5626 0x0670, // 0670 ; INHERITED 5627 0x0671, // 0671..06DC; ARABIC 5628 0x06DD, // 06DD ; COMMON 5629 0x06DE, // 06DE..06FF; ARABIC 5630 0x0700, // 0700..070D; SYRIAC 5631 0x070E, // 070E ; UNKNOWN 5632 0x070F, // 070F..074A; SYRIAC 5633 0x074B, // 074B..074C; UNKNOWN 5634 0x074D, // 074D..074F; SYRIAC 5635 0x0750, // 0750..077F; ARABIC 5636 0x0780, // 0780..07B1; THAANA 5637 0x07B2, // 07B2..07BF; UNKNOWN 5638 0x07C0, // 07C0..07FA; NKO 5639 0x07FB, // 07FB..07FC; UNKNOWN 5640 0x07FD, // 07FD..07FF; NKO 5641 0x0800, // 0800..082D; SAMARITAN 5642 0x082E, // 082E..082F; UNKNOWN 5643 0x0830, // 0830..083E; SAMARITAN 5644 0x083F, // 083F ; UNKNOWN 5645 0x0840, // 0840..085B; MANDAIC 5646 0x085C, // 085C..085D; UNKNOWN 5647 0x085E, // 085E ; MANDAIC 5648 0x085F, // 085F ; UNKNOWN 5649 0x0860, // 0860..086A; SYRIAC 5650 0x086B, // 086B..086F; UNKNOWN 5651 0x0870, // 0870..088E; ARABIC 5652 0x088F, // 088F ; UNKNOWN 5653 0x0890, // 0890..0891; ARABIC 5654 0x0892, // 0892..0896; UNKNOWN 5655 0x0897, // 0897..08E1; ARABIC 5656 0x08E2, // 08E2 ; COMMON 5657 0x08E3, // 08E3..08FF; ARABIC 5658 0x0900, // 0900..0950; DEVANAGARI 5659 0x0951, // 0951..0954; INHERITED 5660 0x0955, // 0955..0963; DEVANAGARI 5661 0x0964, // 0964..0965; COMMON 5662 0x0966, // 0966..097F; DEVANAGARI 5663 0x0980, // 0980..0983; BENGALI 5664 0x0984, // 0984 ; UNKNOWN 5665 0x0985, // 0985..098C; BENGALI 5666 0x098D, // 098D..098E; UNKNOWN 5667 0x098F, // 098F..0990; BENGALI 5668 0x0991, // 0991..0992; UNKNOWN 5669 0x0993, // 0993..09A8; BENGALI 5670 0x09A9, // 09A9 ; UNKNOWN 5671 0x09AA, // 09AA..09B0; BENGALI 5672 0x09B1, // 09B1 ; UNKNOWN 5673 0x09B2, // 09B2 ; BENGALI 5674 0x09B3, // 09B3..09B5; UNKNOWN 5675 0x09B6, // 09B6..09B9; BENGALI 5676 0x09BA, // 09BA..09BB; UNKNOWN 5677 0x09BC, // 09BC..09C4; BENGALI 5678 0x09C5, // 09C5..09C6; UNKNOWN 5679 0x09C7, // 09C7..09C8; BENGALI 5680 0x09C9, // 09C9..09CA; UNKNOWN 5681 0x09CB, // 09CB..09CE; BENGALI 5682 0x09CF, // 09CF..09D6; UNKNOWN 5683 0x09D7, // 09D7 ; BENGALI 5684 0x09D8, // 09D8..09DB; UNKNOWN 5685 0x09DC, // 09DC..09DD; BENGALI 5686 0x09DE, // 09DE ; UNKNOWN 5687 0x09DF, // 09DF..09E3; BENGALI 5688 0x09E4, // 09E4..09E5; UNKNOWN 5689 0x09E6, // 09E6..09FE; BENGALI 5690 0x09FF, // 09FF..0A00; UNKNOWN 5691 0x0A01, // 0A01..0A03; GURMUKHI 5692 0x0A04, // 0A04 ; UNKNOWN 5693 0x0A05, // 0A05..0A0A; GURMUKHI 5694 0x0A0B, // 0A0B..0A0E; UNKNOWN 5695 0x0A0F, // 0A0F..0A10; GURMUKHI 5696 0x0A11, // 0A11..0A12; UNKNOWN 5697 0x0A13, // 0A13..0A28; GURMUKHI 5698 0x0A29, // 0A29 ; UNKNOWN 5699 0x0A2A, // 0A2A..0A30; GURMUKHI 5700 0x0A31, // 0A31 ; UNKNOWN 5701 0x0A32, // 0A32..0A33; GURMUKHI 5702 0x0A34, // 0A34 ; UNKNOWN 5703 0x0A35, // 0A35..0A36; GURMUKHI 5704 0x0A37, // 0A37 ; UNKNOWN 5705 0x0A38, // 0A38..0A39; GURMUKHI 5706 0x0A3A, // 0A3A..0A3B; UNKNOWN 5707 0x0A3C, // 0A3C ; GURMUKHI 5708 0x0A3D, // 0A3D ; UNKNOWN 5709 0x0A3E, // 0A3E..0A42; GURMUKHI 5710 0x0A43, // 0A43..0A46; UNKNOWN 5711 0x0A47, // 0A47..0A48; GURMUKHI 5712 0x0A49, // 0A49..0A4A; UNKNOWN 5713 0x0A4B, // 0A4B..0A4D; GURMUKHI 5714 0x0A4E, // 0A4E..0A50; UNKNOWN 5715 0x0A51, // 0A51 ; GURMUKHI 5716 0x0A52, // 0A52..0A58; UNKNOWN 5717 0x0A59, // 0A59..0A5C; GURMUKHI 5718 0x0A5D, // 0A5D ; UNKNOWN 5719 0x0A5E, // 0A5E ; GURMUKHI 5720 0x0A5F, // 0A5F..0A65; UNKNOWN 5721 0x0A66, // 0A66..0A76; GURMUKHI 5722 0x0A77, // 0A77..0A80; UNKNOWN 5723 0x0A81, // 0A81..0A83; GUJARATI 5724 0x0A84, // 0A84 ; UNKNOWN 5725 0x0A85, // 0A85..0A8D; GUJARATI 5726 0x0A8E, // 0A8E ; UNKNOWN 5727 0x0A8F, // 0A8F..0A91; GUJARATI 5728 0x0A92, // 0A92 ; UNKNOWN 5729 0x0A93, // 0A93..0AA8; GUJARATI 5730 0x0AA9, // 0AA9 ; UNKNOWN 5731 0x0AAA, // 0AAA..0AB0; GUJARATI 5732 0x0AB1, // 0AB1 ; UNKNOWN 5733 0x0AB2, // 0AB2..0AB3; GUJARATI 5734 0x0AB4, // 0AB4 ; UNKNOWN 5735 0x0AB5, // 0AB5..0AB9; GUJARATI 5736 0x0ABA, // 0ABA..0ABB; UNKNOWN 5737 0x0ABC, // 0ABC..0AC5; GUJARATI 5738 0x0AC6, // 0AC6 ; UNKNOWN 5739 0x0AC7, // 0AC7..0AC9; GUJARATI 5740 0x0ACA, // 0ACA ; UNKNOWN 5741 0x0ACB, // 0ACB..0ACD; GUJARATI 5742 0x0ACE, // 0ACE..0ACF; UNKNOWN 5743 0x0AD0, // 0AD0 ; GUJARATI 5744 0x0AD1, // 0AD1..0ADF; UNKNOWN 5745 0x0AE0, // 0AE0..0AE3; GUJARATI 5746 0x0AE4, // 0AE4..0AE5; UNKNOWN 5747 0x0AE6, // 0AE6..0AF1; GUJARATI 5748 0x0AF2, // 0AF2..0AF8; UNKNOWN 5749 0x0AF9, // 0AF9..0AFF; GUJARATI 5750 0x0B00, // 0B00 ; UNKNOWN 5751 0x0B01, // 0B01..0B03; ORIYA 5752 0x0B04, // 0B04 ; UNKNOWN 5753 0x0B05, // 0B05..0B0C; ORIYA 5754 0x0B0D, // 0B0D..0B0E; UNKNOWN 5755 0x0B0F, // 0B0F..0B10; ORIYA 5756 0x0B11, // 0B11..0B12; UNKNOWN 5757 0x0B13, // 0B13..0B28; ORIYA 5758 0x0B29, // 0B29 ; UNKNOWN 5759 0x0B2A, // 0B2A..0B30; ORIYA 5760 0x0B31, // 0B31 ; UNKNOWN 5761 0x0B32, // 0B32..0B33; ORIYA 5762 0x0B34, // 0B34 ; UNKNOWN 5763 0x0B35, // 0B35..0B39; ORIYA 5764 0x0B3A, // 0B3A..0B3B; UNKNOWN 5765 0x0B3C, // 0B3C..0B44; ORIYA 5766 0x0B45, // 0B45..0B46; UNKNOWN 5767 0x0B47, // 0B47..0B48; ORIYA 5768 0x0B49, // 0B49..0B4A; UNKNOWN 5769 0x0B4B, // 0B4B..0B4D; ORIYA 5770 0x0B4E, // 0B4E..0B54; UNKNOWN 5771 0x0B55, // 0B55..0B57; ORIYA 5772 0x0B58, // 0B58..0B5B; UNKNOWN 5773 0x0B5C, // 0B5C..0B5D; ORIYA 5774 0x0B5E, // 0B5E ; UNKNOWN 5775 0x0B5F, // 0B5F..0B63; ORIYA 5776 0x0B64, // 0B64..0B65; UNKNOWN 5777 0x0B66, // 0B66..0B77; ORIYA 5778 0x0B78, // 0B78..0B81; UNKNOWN 5779 0x0B82, // 0B82..0B83; TAMIL 5780 0x0B84, // 0B84 ; UNKNOWN 5781 0x0B85, // 0B85..0B8A; TAMIL 5782 0x0B8B, // 0B8B..0B8D; UNKNOWN 5783 0x0B8E, // 0B8E..0B90; TAMIL 5784 0x0B91, // 0B91 ; UNKNOWN 5785 0x0B92, // 0B92..0B95; TAMIL 5786 0x0B96, // 0B96..0B98; UNKNOWN 5787 0x0B99, // 0B99..0B9A; TAMIL 5788 0x0B9B, // 0B9B ; UNKNOWN 5789 0x0B9C, // 0B9C ; TAMIL 5790 0x0B9D, // 0B9D ; UNKNOWN 5791 0x0B9E, // 0B9E..0B9F; TAMIL 5792 0x0BA0, // 0BA0..0BA2; UNKNOWN 5793 0x0BA3, // 0BA3..0BA4; TAMIL 5794 0x0BA5, // 0BA5..0BA7; UNKNOWN 5795 0x0BA8, // 0BA8..0BAA; TAMIL 5796 0x0BAB, // 0BAB..0BAD; UNKNOWN 5797 0x0BAE, // 0BAE..0BB9; TAMIL 5798 0x0BBA, // 0BBA..0BBD; UNKNOWN 5799 0x0BBE, // 0BBE..0BC2; TAMIL 5800 0x0BC3, // 0BC3..0BC5; UNKNOWN 5801 0x0BC6, // 0BC6..0BC8; TAMIL 5802 0x0BC9, // 0BC9 ; UNKNOWN 5803 0x0BCA, // 0BCA..0BCD; TAMIL 5804 0x0BCE, // 0BCE..0BCF; UNKNOWN 5805 0x0BD0, // 0BD0 ; TAMIL 5806 0x0BD1, // 0BD1..0BD6; UNKNOWN 5807 0x0BD7, // 0BD7 ; TAMIL 5808 0x0BD8, // 0BD8..0BE5; UNKNOWN 5809 0x0BE6, // 0BE6..0BFA; TAMIL 5810 0x0BFB, // 0BFB..0BFF; UNKNOWN 5811 0x0C00, // 0C00..0C0C; TELUGU 5812 0x0C0D, // 0C0D ; UNKNOWN 5813 0x0C0E, // 0C0E..0C10; TELUGU 5814 0x0C11, // 0C11 ; UNKNOWN 5815 0x0C12, // 0C12..0C28; TELUGU 5816 0x0C29, // 0C29 ; UNKNOWN 5817 0x0C2A, // 0C2A..0C39; TELUGU 5818 0x0C3A, // 0C3A..0C3B; UNKNOWN 5819 0x0C3C, // 0C3C..0C44; TELUGU 5820 0x0C45, // 0C45 ; UNKNOWN 5821 0x0C46, // 0C46..0C48; TELUGU 5822 0x0C49, // 0C49 ; UNKNOWN 5823 0x0C4A, // 0C4A..0C4D; TELUGU 5824 0x0C4E, // 0C4E..0C54; UNKNOWN 5825 0x0C55, // 0C55..0C56; TELUGU 5826 0x0C57, // 0C57 ; UNKNOWN 5827 0x0C58, // 0C58..0C5A; TELUGU 5828 0x0C5B, // 0C5B..0C5C; UNKNOWN 5829 0x0C5D, // 0C5D ; TELUGU 5830 0x0C5E, // 0C5E..0C5F; UNKNOWN 5831 0x0C60, // 0C60..0C63; TELUGU 5832 0x0C64, // 0C64..0C65; UNKNOWN 5833 0x0C66, // 0C66..0C6F; TELUGU 5834 0x0C70, // 0C70..0C76; UNKNOWN 5835 0x0C77, // 0C77..0C7F; TELUGU 5836 0x0C80, // 0C80..0C8C; KANNADA 5837 0x0C8D, // 0C8D ; UNKNOWN 5838 0x0C8E, // 0C8E..0C90; KANNADA 5839 0x0C91, // 0C91 ; UNKNOWN 5840 0x0C92, // 0C92..0CA8; KANNADA 5841 0x0CA9, // 0CA9 ; UNKNOWN 5842 0x0CAA, // 0CAA..0CB3; KANNADA 5843 0x0CB4, // 0CB4 ; UNKNOWN 5844 0x0CB5, // 0CB5..0CB9; KANNADA 5845 0x0CBA, // 0CBA..0CBB; UNKNOWN 5846 0x0CBC, // 0CBC..0CC4; KANNADA 5847 0x0CC5, // 0CC5 ; UNKNOWN 5848 0x0CC6, // 0CC6..0CC8; KANNADA 5849 0x0CC9, // 0CC9 ; UNKNOWN 5850 0x0CCA, // 0CCA..0CCD; KANNADA 5851 0x0CCE, // 0CCE..0CD4; UNKNOWN 5852 0x0CD5, // 0CD5..0CD6; KANNADA 5853 0x0CD7, // 0CD7..0CDC; UNKNOWN 5854 0x0CDD, // 0CDD..0CDE; KANNADA 5855 0x0CDF, // 0CDF ; UNKNOWN 5856 0x0CE0, // 0CE0..0CE3; KANNADA 5857 0x0CE4, // 0CE4..0CE5; UNKNOWN 5858 0x0CE6, // 0CE6..0CEF; KANNADA 5859 0x0CF0, // 0CF0 ; UNKNOWN 5860 0x0CF1, // 0CF1..0CF3; KANNADA 5861 0x0CF4, // 0CF4..0CFF; UNKNOWN 5862 0x0D00, // 0D00..0D0C; MALAYALAM 5863 0x0D0D, // 0D0D ; UNKNOWN 5864 0x0D0E, // 0D0E..0D10; MALAYALAM 5865 0x0D11, // 0D11 ; UNKNOWN 5866 0x0D12, // 0D12..0D44; MALAYALAM 5867 0x0D45, // 0D45 ; UNKNOWN 5868 0x0D46, // 0D46..0D48; MALAYALAM 5869 0x0D49, // 0D49 ; UNKNOWN 5870 0x0D4A, // 0D4A..0D4F; MALAYALAM 5871 0x0D50, // 0D50..0D53; UNKNOWN 5872 0x0D54, // 0D54..0D63; MALAYALAM 5873 0x0D64, // 0D64..0D65; UNKNOWN 5874 0x0D66, // 0D66..0D7F; MALAYALAM 5875 0x0D80, // 0D80 ; UNKNOWN 5876 0x0D81, // 0D81..0D83; SINHALA 5877 0x0D84, // 0D84 ; UNKNOWN 5878 0x0D85, // 0D85..0D96; SINHALA 5879 0x0D97, // 0D97..0D99; UNKNOWN 5880 0x0D9A, // 0D9A..0DB1; SINHALA 5881 0x0DB2, // 0DB2 ; UNKNOWN 5882 0x0DB3, // 0DB3..0DBB; SINHALA 5883 0x0DBC, // 0DBC ; UNKNOWN 5884 0x0DBD, // 0DBD ; SINHALA 5885 0x0DBE, // 0DBE..0DBF; UNKNOWN 5886 0x0DC0, // 0DC0..0DC6; SINHALA 5887 0x0DC7, // 0DC7..0DC9; UNKNOWN 5888 0x0DCA, // 0DCA ; SINHALA 5889 0x0DCB, // 0DCB..0DCE; UNKNOWN 5890 0x0DCF, // 0DCF..0DD4; SINHALA 5891 0x0DD5, // 0DD5 ; UNKNOWN 5892 0x0DD6, // 0DD6 ; SINHALA 5893 0x0DD7, // 0DD7 ; UNKNOWN 5894 0x0DD8, // 0DD8..0DDF; SINHALA 5895 0x0DE0, // 0DE0..0DE5; UNKNOWN 5896 0x0DE6, // 0DE6..0DEF; SINHALA 5897 0x0DF0, // 0DF0..0DF1; UNKNOWN 5898 0x0DF2, // 0DF2..0DF4; SINHALA 5899 0x0DF5, // 0DF5..0E00; UNKNOWN 5900 0x0E01, // 0E01..0E3A; THAI 5901 0x0E3B, // 0E3B..0E3E; UNKNOWN 5902 0x0E3F, // 0E3F ; COMMON 5903 0x0E40, // 0E40..0E5B; THAI 5904 0x0E5C, // 0E5C..0E80; UNKNOWN 5905 0x0E81, // 0E81..0E82; LAO 5906 0x0E83, // 0E83 ; UNKNOWN 5907 0x0E84, // 0E84 ; LAO 5908 0x0E85, // 0E85 ; UNKNOWN 5909 0x0E86, // 0E86..0E8A; LAO 5910 0x0E8B, // 0E8B ; UNKNOWN 5911 0x0E8C, // 0E8C..0EA3; LAO 5912 0x0EA4, // 0EA4 ; UNKNOWN 5913 0x0EA5, // 0EA5 ; LAO 5914 0x0EA6, // 0EA6 ; UNKNOWN 5915 0x0EA7, // 0EA7..0EBD; LAO 5916 0x0EBE, // 0EBE..0EBF; UNKNOWN 5917 0x0EC0, // 0EC0..0EC4; LAO 5918 0x0EC5, // 0EC5 ; UNKNOWN 5919 0x0EC6, // 0EC6 ; LAO 5920 0x0EC7, // 0EC7 ; UNKNOWN 5921 0x0EC8, // 0EC8..0ECE; LAO 5922 0x0ECF, // 0ECF ; UNKNOWN 5923 0x0ED0, // 0ED0..0ED9; LAO 5924 0x0EDA, // 0EDA..0EDB; UNKNOWN 5925 0x0EDC, // 0EDC..0EDF; LAO 5926 0x0EE0, // 0EE0..0EFF; UNKNOWN 5927 0x0F00, // 0F00..0F47; TIBETAN 5928 0x0F48, // 0F48 ; UNKNOWN 5929 0x0F49, // 0F49..0F6C; TIBETAN 5930 0x0F6D, // 0F6D..0F70; UNKNOWN 5931 0x0F71, // 0F71..0F97; TIBETAN 5932 0x0F98, // 0F98 ; UNKNOWN 5933 0x0F99, // 0F99..0FBC; TIBETAN 5934 0x0FBD, // 0FBD ; UNKNOWN 5935 0x0FBE, // 0FBE..0FCC; TIBETAN 5936 0x0FCD, // 0FCD ; UNKNOWN 5937 0x0FCE, // 0FCE..0FD4; TIBETAN 5938 0x0FD5, // 0FD5..0FD8; COMMON 5939 0x0FD9, // 0FD9..0FDA; TIBETAN 5940 0x0FDB, // 0FDB..0FFF; UNKNOWN 5941 0x1000, // 1000..109F; MYANMAR 5942 0x10A0, // 10A0..10C5; GEORGIAN 5943 0x10C6, // 10C6 ; UNKNOWN 5944 0x10C7, // 10C7 ; GEORGIAN 5945 0x10C8, // 10C8..10CC; UNKNOWN 5946 0x10CD, // 10CD ; GEORGIAN 5947 0x10CE, // 10CE..10CF; UNKNOWN 5948 0x10D0, // 10D0..10FA; GEORGIAN 5949 0x10FB, // 10FB ; COMMON 5950 0x10FC, // 10FC..10FF; GEORGIAN 5951 0x1100, // 1100..11FF; HANGUL 5952 0x1200, // 1200..1248; ETHIOPIC 5953 0x1249, // 1249 ; UNKNOWN 5954 0x124A, // 124A..124D; ETHIOPIC 5955 0x124E, // 124E..124F; UNKNOWN 5956 0x1250, // 1250..1256; ETHIOPIC 5957 0x1257, // 1257 ; UNKNOWN 5958 0x1258, // 1258 ; ETHIOPIC 5959 0x1259, // 1259 ; UNKNOWN 5960 0x125A, // 125A..125D; ETHIOPIC 5961 0x125E, // 125E..125F; UNKNOWN 5962 0x1260, // 1260..1288; ETHIOPIC 5963 0x1289, // 1289 ; UNKNOWN 5964 0x128A, // 128A..128D; ETHIOPIC 5965 0x128E, // 128E..128F; UNKNOWN 5966 0x1290, // 1290..12B0; ETHIOPIC 5967 0x12B1, // 12B1 ; UNKNOWN 5968 0x12B2, // 12B2..12B5; ETHIOPIC 5969 0x12B6, // 12B6..12B7; UNKNOWN 5970 0x12B8, // 12B8..12BE; ETHIOPIC 5971 0x12BF, // 12BF ; UNKNOWN 5972 0x12C0, // 12C0 ; ETHIOPIC 5973 0x12C1, // 12C1 ; UNKNOWN 5974 0x12C2, // 12C2..12C5; ETHIOPIC 5975 0x12C6, // 12C6..12C7; UNKNOWN 5976 0x12C8, // 12C8..12D6; ETHIOPIC 5977 0x12D7, // 12D7 ; UNKNOWN 5978 0x12D8, // 12D8..1310; ETHIOPIC 5979 0x1311, // 1311 ; UNKNOWN 5980 0x1312, // 1312..1315; ETHIOPIC 5981 0x1316, // 1316..1317; UNKNOWN 5982 0x1318, // 1318..135A; ETHIOPIC 5983 0x135B, // 135B..135C; UNKNOWN 5984 0x135D, // 135D..137C; ETHIOPIC 5985 0x137D, // 137D..137F; UNKNOWN 5986 0x1380, // 1380..1399; ETHIOPIC 5987 0x139A, // 139A..139F; UNKNOWN 5988 0x13A0, // 13A0..13F5; CHEROKEE 5989 0x13F6, // 13F6..13F7; UNKNOWN 5990 0x13F8, // 13F8..13FD; CHEROKEE 5991 0x13FE, // 13FE..13FF; UNKNOWN 5992 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5993 0x1680, // 1680..169C; OGHAM 5994 0x169D, // 169D..169F; UNKNOWN 5995 0x16A0, // 16A0..16EA; RUNIC 5996 0x16EB, // 16EB..16ED; COMMON 5997 0x16EE, // 16EE..16F8; RUNIC 5998 0x16F9, // 16F9..16FF; UNKNOWN 5999 0x1700, // 1700..1715; TAGALOG 6000 0x1716, // 1716..171E; UNKNOWN 6001 0x171F, // 171F ; TAGALOG 6002 0x1720, // 1720..1734; HANUNOO 6003 0x1735, // 1735..1736; COMMON 6004 0x1737, // 1737..173F; UNKNOWN 6005 0x1740, // 1740..1753; BUHID 6006 0x1754, // 1754..175F; UNKNOWN 6007 0x1760, // 1760..176C; TAGBANWA 6008 0x176D, // 176D ; UNKNOWN 6009 0x176E, // 176E..1770; TAGBANWA 6010 0x1771, // 1771 ; UNKNOWN 6011 0x1772, // 1772..1773; TAGBANWA 6012 0x1774, // 1774..177F; UNKNOWN 6013 0x1780, // 1780..17DD; KHMER 6014 0x17DE, // 17DE..17DF; UNKNOWN 6015 0x17E0, // 17E0..17E9; KHMER 6016 0x17EA, // 17EA..17EF; UNKNOWN 6017 0x17F0, // 17F0..17F9; KHMER 6018 0x17FA, // 17FA..17FF; UNKNOWN 6019 0x1800, // 1800..1801; MONGOLIAN 6020 0x1802, // 1802..1803; COMMON 6021 0x1804, // 1804 ; MONGOLIAN 6022 0x1805, // 1805 ; COMMON 6023 0x1806, // 1806..1819; MONGOLIAN 6024 0x181A, // 181A..181F; UNKNOWN 6025 0x1820, // 1820..1878; MONGOLIAN 6026 0x1879, // 1879..187F; UNKNOWN 6027 0x1880, // 1880..18AA; MONGOLIAN 6028 0x18AB, // 18AB..18AF; UNKNOWN 6029 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 6030 0x18F6, // 18F6..18FF; UNKNOWN 6031 0x1900, // 1900..191E; LIMBU 6032 0x191F, // 191F ; UNKNOWN 6033 0x1920, // 1920..192B; LIMBU 6034 0x192C, // 192C..192F; UNKNOWN 6035 0x1930, // 1930..193B; LIMBU 6036 0x193C, // 193C..193F; UNKNOWN 6037 0x1940, // 1940 ; LIMBU 6038 0x1941, // 1941..1943; UNKNOWN 6039 0x1944, // 1944..194F; LIMBU 6040 0x1950, // 1950..196D; TAI_LE 6041 0x196E, // 196E..196F; UNKNOWN 6042 0x1970, // 1970..1974; TAI_LE 6043 0x1975, // 1975..197F; UNKNOWN 6044 0x1980, // 1980..19AB; NEW_TAI_LUE 6045 0x19AC, // 19AC..19AF; UNKNOWN 6046 0x19B0, // 19B0..19C9; NEW_TAI_LUE 6047 0x19CA, // 19CA..19CF; UNKNOWN 6048 0x19D0, // 19D0..19DA; NEW_TAI_LUE 6049 0x19DB, // 19DB..19DD; UNKNOWN 6050 0x19DE, // 19DE..19DF; NEW_TAI_LUE 6051 0x19E0, // 19E0..19FF; KHMER 6052 0x1A00, // 1A00..1A1B; BUGINESE 6053 0x1A1C, // 1A1C..1A1D; UNKNOWN 6054 0x1A1E, // 1A1E..1A1F; BUGINESE 6055 0x1A20, // 1A20..1A5E; TAI_THAM 6056 0x1A5F, // 1A5F ; UNKNOWN 6057 0x1A60, // 1A60..1A7C; TAI_THAM 6058 0x1A7D, // 1A7D..1A7E; UNKNOWN 6059 0x1A7F, // 1A7F..1A89; TAI_THAM 6060 0x1A8A, // 1A8A..1A8F; UNKNOWN 6061 0x1A90, // 1A90..1A99; TAI_THAM 6062 0x1A9A, // 1A9A..1A9F; UNKNOWN 6063 0x1AA0, // 1AA0..1AAD; TAI_THAM 6064 0x1AAE, // 1AAE..1AAF; UNKNOWN 6065 0x1AB0, // 1AB0..1ACE; INHERITED 6066 0x1ACF, // 1ACF..1AFF; UNKNOWN 6067 0x1B00, // 1B00..1B4C; BALINESE 6068 0x1B4D, // 1B4D ; UNKNOWN 6069 0x1B4E, // 1B4E..1B7F; BALINESE 6070 0x1B80, // 1B80..1BBF; SUNDANESE 6071 0x1BC0, // 1BC0..1BF3; BATAK 6072 0x1BF4, // 1BF4..1BFB; UNKNOWN 6073 0x1BFC, // 1BFC..1BFF; BATAK 6074 0x1C00, // 1C00..1C37; LEPCHA 6075 0x1C38, // 1C38..1C3A; UNKNOWN 6076 0x1C3B, // 1C3B..1C49; LEPCHA 6077 0x1C4A, // 1C4A..1C4C; UNKNOWN 6078 0x1C4D, // 1C4D..1C4F; LEPCHA 6079 0x1C50, // 1C50..1C7F; OL_CHIKI 6080 0x1C80, // 1C80..1C8A; CYRILLIC 6081 0x1C8B, // 1C8B..1C8F; UNKNOWN 6082 0x1C90, // 1C90..1CBA; GEORGIAN 6083 0x1CBB, // 1CBB..1CBC; UNKNOWN 6084 0x1CBD, // 1CBD..1CBF; GEORGIAN 6085 0x1CC0, // 1CC0..1CC7; SUNDANESE 6086 0x1CC8, // 1CC8..1CCF; UNKNOWN 6087 0x1CD0, // 1CD0..1CD2; INHERITED 6088 0x1CD3, // 1CD3 ; COMMON 6089 0x1CD4, // 1CD4..1CE0; INHERITED 6090 0x1CE1, // 1CE1 ; COMMON 6091 0x1CE2, // 1CE2..1CE8; INHERITED 6092 0x1CE9, // 1CE9..1CEC; COMMON 6093 0x1CED, // 1CED ; INHERITED 6094 0x1CEE, // 1CEE..1CF3; COMMON 6095 0x1CF4, // 1CF4 ; INHERITED 6096 0x1CF5, // 1CF5..1CF7; COMMON 6097 0x1CF8, // 1CF8..1CF9; INHERITED 6098 0x1CFA, // 1CFA ; COMMON 6099 0x1CFB, // 1CFB..1CFF; UNKNOWN 6100 0x1D00, // 1D00..1D25; LATIN 6101 0x1D26, // 1D26..1D2A; GREEK 6102 0x1D2B, // 1D2B ; CYRILLIC 6103 0x1D2C, // 1D2C..1D5C; LATIN 6104 0x1D5D, // 1D5D..1D61; GREEK 6105 0x1D62, // 1D62..1D65; LATIN 6106 0x1D66, // 1D66..1D6A; GREEK 6107 0x1D6B, // 1D6B..1D77; LATIN 6108 0x1D78, // 1D78 ; CYRILLIC 6109 0x1D79, // 1D79..1DBE; LATIN 6110 0x1DBF, // 1DBF ; GREEK 6111 0x1DC0, // 1DC0..1DFF; INHERITED 6112 0x1E00, // 1E00..1EFF; LATIN 6113 0x1F00, // 1F00..1F15; GREEK 6114 0x1F16, // 1F16..1F17; UNKNOWN 6115 0x1F18, // 1F18..1F1D; GREEK 6116 0x1F1E, // 1F1E..1F1F; UNKNOWN 6117 0x1F20, // 1F20..1F45; GREEK 6118 0x1F46, // 1F46..1F47; UNKNOWN 6119 0x1F48, // 1F48..1F4D; GREEK 6120 0x1F4E, // 1F4E..1F4F; UNKNOWN 6121 0x1F50, // 1F50..1F57; GREEK 6122 0x1F58, // 1F58 ; UNKNOWN 6123 0x1F59, // 1F59 ; GREEK 6124 0x1F5A, // 1F5A ; UNKNOWN 6125 0x1F5B, // 1F5B ; GREEK 6126 0x1F5C, // 1F5C ; UNKNOWN 6127 0x1F5D, // 1F5D ; GREEK 6128 0x1F5E, // 1F5E ; UNKNOWN 6129 0x1F5F, // 1F5F..1F7D; GREEK 6130 0x1F7E, // 1F7E..1F7F; UNKNOWN 6131 0x1F80, // 1F80..1FB4; GREEK 6132 0x1FB5, // 1FB5 ; UNKNOWN 6133 0x1FB6, // 1FB6..1FC4; GREEK 6134 0x1FC5, // 1FC5 ; UNKNOWN 6135 0x1FC6, // 1FC6..1FD3; GREEK 6136 0x1FD4, // 1FD4..1FD5; UNKNOWN 6137 0x1FD6, // 1FD6..1FDB; GREEK 6138 0x1FDC, // 1FDC ; UNKNOWN 6139 0x1FDD, // 1FDD..1FEF; GREEK 6140 0x1FF0, // 1FF0..1FF1; UNKNOWN 6141 0x1FF2, // 1FF2..1FF4; GREEK 6142 0x1FF5, // 1FF5 ; UNKNOWN 6143 0x1FF6, // 1FF6..1FFE; GREEK 6144 0x1FFF, // 1FFF ; UNKNOWN 6145 0x2000, // 2000..200B; COMMON 6146 0x200C, // 200C..200D; INHERITED 6147 0x200E, // 200E..2064; COMMON 6148 0x2065, // 2065 ; UNKNOWN 6149 0x2066, // 2066..2070; COMMON 6150 0x2071, // 2071 ; LATIN 6151 0x2072, // 2072..2073; UNKNOWN 6152 0x2074, // 2074..207E; COMMON 6153 0x207F, // 207F ; LATIN 6154 0x2080, // 2080..208E; COMMON 6155 0x208F, // 208F ; UNKNOWN 6156 0x2090, // 2090..209C; LATIN 6157 0x209D, // 209D..209F; UNKNOWN 6158 0x20A0, // 20A0..20C0; COMMON 6159 0x20C1, // 20C1..20CF; UNKNOWN 6160 0x20D0, // 20D0..20F0; INHERITED 6161 0x20F1, // 20F1..20FF; UNKNOWN 6162 0x2100, // 2100..2125; COMMON 6163 0x2126, // 2126 ; GREEK 6164 0x2127, // 2127..2129; COMMON 6165 0x212A, // 212A..212B; LATIN 6166 0x212C, // 212C..2131; COMMON 6167 0x2132, // 2132 ; LATIN 6168 0x2133, // 2133..214D; COMMON 6169 0x214E, // 214E ; LATIN 6170 0x214F, // 214F..215F; COMMON 6171 0x2160, // 2160..2188; LATIN 6172 0x2189, // 2189..218B; COMMON 6173 0x218C, // 218C..218F; UNKNOWN 6174 0x2190, // 2190..2429; COMMON 6175 0x242A, // 242A..243F; UNKNOWN 6176 0x2440, // 2440..244A; COMMON 6177 0x244B, // 244B..245F; UNKNOWN 6178 0x2460, // 2460..27FF; COMMON 6179 0x2800, // 2800..28FF; BRAILLE 6180 0x2900, // 2900..2B73; COMMON 6181 0x2B74, // 2B74..2B75; UNKNOWN 6182 0x2B76, // 2B76..2B95; COMMON 6183 0x2B96, // 2B96 ; UNKNOWN 6184 0x2B97, // 2B97..2BFF; COMMON 6185 0x2C00, // 2C00..2C5F; GLAGOLITIC 6186 0x2C60, // 2C60..2C7F; LATIN 6187 0x2C80, // 2C80..2CF3; COPTIC 6188 0x2CF4, // 2CF4..2CF8; UNKNOWN 6189 0x2CF9, // 2CF9..2CFF; COPTIC 6190 0x2D00, // 2D00..2D25; GEORGIAN 6191 0x2D26, // 2D26 ; UNKNOWN 6192 0x2D27, // 2D27 ; GEORGIAN 6193 0x2D28, // 2D28..2D2C; UNKNOWN 6194 0x2D2D, // 2D2D ; GEORGIAN 6195 0x2D2E, // 2D2E..2D2F; UNKNOWN 6196 0x2D30, // 2D30..2D67; TIFINAGH 6197 0x2D68, // 2D68..2D6E; UNKNOWN 6198 0x2D6F, // 2D6F..2D70; TIFINAGH 6199 0x2D71, // 2D71..2D7E; UNKNOWN 6200 0x2D7F, // 2D7F ; TIFINAGH 6201 0x2D80, // 2D80..2D96; ETHIOPIC 6202 0x2D97, // 2D97..2D9F; UNKNOWN 6203 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6204 0x2DA7, // 2DA7 ; UNKNOWN 6205 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6206 0x2DAF, // 2DAF ; UNKNOWN 6207 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6208 0x2DB7, // 2DB7 ; UNKNOWN 6209 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6210 0x2DBF, // 2DBF ; UNKNOWN 6211 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6212 0x2DC7, // 2DC7 ; UNKNOWN 6213 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6214 0x2DCF, // 2DCF ; UNKNOWN 6215 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6216 0x2DD7, // 2DD7 ; UNKNOWN 6217 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6218 0x2DDF, // 2DDF ; UNKNOWN 6219 0x2DE0, // 2DE0..2DFF; CYRILLIC 6220 0x2E00, // 2E00..2E5D; COMMON 6221 0x2E5E, // 2E5E..2E7F; UNKNOWN 6222 0x2E80, // 2E80..2E99; HAN 6223 0x2E9A, // 2E9A ; UNKNOWN 6224 0x2E9B, // 2E9B..2EF3; HAN 6225 0x2EF4, // 2EF4..2EFF; UNKNOWN 6226 0x2F00, // 2F00..2FD5; HAN 6227 0x2FD6, // 2FD6..2FEF; UNKNOWN 6228 0x2FF0, // 2FF0..3004; COMMON 6229 0x3005, // 3005 ; HAN 6230 0x3006, // 3006 ; COMMON 6231 0x3007, // 3007 ; HAN 6232 0x3008, // 3008..3020; COMMON 6233 0x3021, // 3021..3029; HAN 6234 0x302A, // 302A..302D; INHERITED 6235 0x302E, // 302E..302F; HANGUL 6236 0x3030, // 3030..3037; COMMON 6237 0x3038, // 3038..303B; HAN 6238 0x303C, // 303C..303F; COMMON 6239 0x3040, // 3040 ; UNKNOWN 6240 0x3041, // 3041..3096; HIRAGANA 6241 0x3097, // 3097..3098; UNKNOWN 6242 0x3099, // 3099..309A; INHERITED 6243 0x309B, // 309B..309C; COMMON 6244 0x309D, // 309D..309F; HIRAGANA 6245 0x30A0, // 30A0 ; COMMON 6246 0x30A1, // 30A1..30FA; KATAKANA 6247 0x30FB, // 30FB..30FC; COMMON 6248 0x30FD, // 30FD..30FF; KATAKANA 6249 0x3100, // 3100..3104; UNKNOWN 6250 0x3105, // 3105..312F; BOPOMOFO 6251 0x3130, // 3130 ; UNKNOWN 6252 0x3131, // 3131..318E; HANGUL 6253 0x318F, // 318F ; UNKNOWN 6254 0x3190, // 3190..319F; COMMON 6255 0x31A0, // 31A0..31BF; BOPOMOFO 6256 0x31C0, // 31C0..31E5; COMMON 6257 0x31E6, // 31E6..31EE; UNKNOWN 6258 0x31EF, // 31EF ; COMMON 6259 0x31F0, // 31F0..31FF; KATAKANA 6260 0x3200, // 3200..321E; HANGUL 6261 0x321F, // 321F ; UNKNOWN 6262 0x3220, // 3220..325F; COMMON 6263 0x3260, // 3260..327E; HANGUL 6264 0x327F, // 327F..32CF; COMMON 6265 0x32D0, // 32D0..32FE; KATAKANA 6266 0x32FF, // 32FF ; COMMON 6267 0x3300, // 3300..3357; KATAKANA 6268 0x3358, // 3358..33FF; COMMON 6269 0x3400, // 3400..4DBF; HAN 6270 0x4DC0, // 4DC0..4DFF; COMMON 6271 0x4E00, // 4E00..9FFF; HAN 6272 0xA000, // A000..A48C; YI 6273 0xA48D, // A48D..A48F; UNKNOWN 6274 0xA490, // A490..A4C6; YI 6275 0xA4C7, // A4C7..A4CF; UNKNOWN 6276 0xA4D0, // A4D0..A4FF; LISU 6277 0xA500, // A500..A62B; VAI 6278 0xA62C, // A62C..A63F; UNKNOWN 6279 0xA640, // A640..A69F; CYRILLIC 6280 0xA6A0, // A6A0..A6F7; BAMUM 6281 0xA6F8, // A6F8..A6FF; UNKNOWN 6282 0xA700, // A700..A721; COMMON 6283 0xA722, // A722..A787; LATIN 6284 0xA788, // A788..A78A; COMMON 6285 0xA78B, // A78B..A7CD; LATIN 6286 0xA7CE, // A7CE..A7CF; UNKNOWN 6287 0xA7D0, // A7D0..A7D1; LATIN 6288 0xA7D2, // A7D2 ; UNKNOWN 6289 0xA7D3, // A7D3 ; LATIN 6290 0xA7D4, // A7D4 ; UNKNOWN 6291 0xA7D5, // A7D5..A7DC; LATIN 6292 0xA7DD, // A7DD..A7F1; UNKNOWN 6293 0xA7F2, // A7F2..A7FF; LATIN 6294 0xA800, // A800..A82C; SYLOTI_NAGRI 6295 0xA82D, // A82D..A82F; UNKNOWN 6296 0xA830, // A830..A839; COMMON 6297 0xA83A, // A83A..A83F; UNKNOWN 6298 0xA840, // A840..A877; PHAGS_PA 6299 0xA878, // A878..A87F; UNKNOWN 6300 0xA880, // A880..A8C5; SAURASHTRA 6301 0xA8C6, // A8C6..A8CD; UNKNOWN 6302 0xA8CE, // A8CE..A8D9; SAURASHTRA 6303 0xA8DA, // A8DA..A8DF; UNKNOWN 6304 0xA8E0, // A8E0..A8FF; DEVANAGARI 6305 0xA900, // A900..A92D; KAYAH_LI 6306 0xA92E, // A92E ; COMMON 6307 0xA92F, // A92F ; KAYAH_LI 6308 0xA930, // A930..A953; REJANG 6309 0xA954, // A954..A95E; UNKNOWN 6310 0xA95F, // A95F ; REJANG 6311 0xA960, // A960..A97C; HANGUL 6312 0xA97D, // A97D..A97F; UNKNOWN 6313 0xA980, // A980..A9CD; JAVANESE 6314 0xA9CE, // A9CE ; UNKNOWN 6315 0xA9CF, // A9CF ; COMMON 6316 0xA9D0, // A9D0..A9D9; JAVANESE 6317 0xA9DA, // A9DA..A9DD; UNKNOWN 6318 0xA9DE, // A9DE..A9DF; JAVANESE 6319 0xA9E0, // A9E0..A9FE; MYANMAR 6320 0xA9FF, // A9FF ; UNKNOWN 6321 0xAA00, // AA00..AA36; CHAM 6322 0xAA37, // AA37..AA3F; UNKNOWN 6323 0xAA40, // AA40..AA4D; CHAM 6324 0xAA4E, // AA4E..AA4F; UNKNOWN 6325 0xAA50, // AA50..AA59; CHAM 6326 0xAA5A, // AA5A..AA5B; UNKNOWN 6327 0xAA5C, // AA5C..AA5F; CHAM 6328 0xAA60, // AA60..AA7F; MYANMAR 6329 0xAA80, // AA80..AAC2; TAI_VIET 6330 0xAAC3, // AAC3..AADA; UNKNOWN 6331 0xAADB, // AADB..AADF; TAI_VIET 6332 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6333 0xAAF7, // AAF7..AB00; UNKNOWN 6334 0xAB01, // AB01..AB06; ETHIOPIC 6335 0xAB07, // AB07..AB08; UNKNOWN 6336 0xAB09, // AB09..AB0E; ETHIOPIC 6337 0xAB0F, // AB0F..AB10; UNKNOWN 6338 0xAB11, // AB11..AB16; ETHIOPIC 6339 0xAB17, // AB17..AB1F; UNKNOWN 6340 0xAB20, // AB20..AB26; ETHIOPIC 6341 0xAB27, // AB27 ; UNKNOWN 6342 0xAB28, // AB28..AB2E; ETHIOPIC 6343 0xAB2F, // AB2F ; UNKNOWN 6344 0xAB30, // AB30..AB5A; LATIN 6345 0xAB5B, // AB5B ; COMMON 6346 0xAB5C, // AB5C..AB64; LATIN 6347 0xAB65, // AB65 ; GREEK 6348 0xAB66, // AB66..AB69; LATIN 6349 0xAB6A, // AB6A..AB6B; COMMON 6350 0xAB6C, // AB6C..AB6F; UNKNOWN 6351 0xAB70, // AB70..ABBF; CHEROKEE 6352 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6353 0xABEE, // ABEE..ABEF; UNKNOWN 6354 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6355 0xABFA, // ABFA..ABFF; UNKNOWN 6356 0xAC00, // AC00..D7A3; HANGUL 6357 0xD7A4, // D7A4..D7AF; UNKNOWN 6358 0xD7B0, // D7B0..D7C6; HANGUL 6359 0xD7C7, // D7C7..D7CA; UNKNOWN 6360 0xD7CB, // D7CB..D7FB; HANGUL 6361 0xD7FC, // D7FC..F8FF; UNKNOWN 6362 0xF900, // F900..FA6D; HAN 6363 0xFA6E, // FA6E..FA6F; UNKNOWN 6364 0xFA70, // FA70..FAD9; HAN 6365 0xFADA, // FADA..FAFF; UNKNOWN 6366 0xFB00, // FB00..FB06; LATIN 6367 0xFB07, // FB07..FB12; UNKNOWN 6368 0xFB13, // FB13..FB17; ARMENIAN 6369 0xFB18, // FB18..FB1C; UNKNOWN 6370 0xFB1D, // FB1D..FB36; HEBREW 6371 0xFB37, // FB37 ; UNKNOWN 6372 0xFB38, // FB38..FB3C; HEBREW 6373 0xFB3D, // FB3D ; UNKNOWN 6374 0xFB3E, // FB3E ; HEBREW 6375 0xFB3F, // FB3F ; UNKNOWN 6376 0xFB40, // FB40..FB41; HEBREW 6377 0xFB42, // FB42 ; UNKNOWN 6378 0xFB43, // FB43..FB44; HEBREW 6379 0xFB45, // FB45 ; UNKNOWN 6380 0xFB46, // FB46..FB4F; HEBREW 6381 0xFB50, // FB50..FBC2; ARABIC 6382 0xFBC3, // FBC3..FBD2; UNKNOWN 6383 0xFBD3, // FBD3..FD3D; ARABIC 6384 0xFD3E, // FD3E..FD3F; COMMON 6385 0xFD40, // FD40..FD8F; ARABIC 6386 0xFD90, // FD90..FD91; UNKNOWN 6387 0xFD92, // FD92..FDC7; ARABIC 6388 0xFDC8, // FDC8..FDCE; UNKNOWN 6389 0xFDCF, // FDCF ; ARABIC 6390 0xFDD0, // FDD0..FDEF; UNKNOWN 6391 0xFDF0, // FDF0..FDFF; ARABIC 6392 0xFE00, // FE00..FE0F; INHERITED 6393 0xFE10, // FE10..FE19; COMMON 6394 0xFE1A, // FE1A..FE1F; UNKNOWN 6395 0xFE20, // FE20..FE2D; INHERITED 6396 0xFE2E, // FE2E..FE2F; CYRILLIC 6397 0xFE30, // FE30..FE52; COMMON 6398 0xFE53, // FE53 ; UNKNOWN 6399 0xFE54, // FE54..FE66; COMMON 6400 0xFE67, // FE67 ; UNKNOWN 6401 0xFE68, // FE68..FE6B; COMMON 6402 0xFE6C, // FE6C..FE6F; UNKNOWN 6403 0xFE70, // FE70..FE74; ARABIC 6404 0xFE75, // FE75 ; UNKNOWN 6405 0xFE76, // FE76..FEFC; ARABIC 6406 0xFEFD, // FEFD..FEFE; UNKNOWN 6407 0xFEFF, // FEFF ; COMMON 6408 0xFF00, // FF00 ; UNKNOWN 6409 0xFF01, // FF01..FF20; COMMON 6410 0xFF21, // FF21..FF3A; LATIN 6411 0xFF3B, // FF3B..FF40; COMMON 6412 0xFF41, // FF41..FF5A; LATIN 6413 0xFF5B, // FF5B..FF65; COMMON 6414 0xFF66, // FF66..FF6F; KATAKANA 6415 0xFF70, // FF70 ; COMMON 6416 0xFF71, // FF71..FF9D; KATAKANA 6417 0xFF9E, // FF9E..FF9F; COMMON 6418 0xFFA0, // FFA0..FFBE; HANGUL 6419 0xFFBF, // FFBF..FFC1; UNKNOWN 6420 0xFFC2, // FFC2..FFC7; HANGUL 6421 0xFFC8, // FFC8..FFC9; UNKNOWN 6422 0xFFCA, // FFCA..FFCF; HANGUL 6423 0xFFD0, // FFD0..FFD1; UNKNOWN 6424 0xFFD2, // FFD2..FFD7; HANGUL 6425 0xFFD8, // FFD8..FFD9; UNKNOWN 6426 0xFFDA, // FFDA..FFDC; HANGUL 6427 0xFFDD, // FFDD..FFDF; UNKNOWN 6428 0xFFE0, // FFE0..FFE6; COMMON 6429 0xFFE7, // FFE7 ; UNKNOWN 6430 0xFFE8, // FFE8..FFEE; COMMON 6431 0xFFEF, // FFEF..FFF8; UNKNOWN 6432 0xFFF9, // FFF9..FFFD; COMMON 6433 0xFFFE, // FFFE..FFFF; UNKNOWN 6434 0x10000, // 10000..1000B; LINEAR_B 6435 0x1000C, // 1000C ; UNKNOWN 6436 0x1000D, // 1000D..10026; LINEAR_B 6437 0x10027, // 10027 ; UNKNOWN 6438 0x10028, // 10028..1003A; LINEAR_B 6439 0x1003B, // 1003B ; UNKNOWN 6440 0x1003C, // 1003C..1003D; LINEAR_B 6441 0x1003E, // 1003E ; UNKNOWN 6442 0x1003F, // 1003F..1004D; LINEAR_B 6443 0x1004E, // 1004E..1004F; UNKNOWN 6444 0x10050, // 10050..1005D; LINEAR_B 6445 0x1005E, // 1005E..1007F; UNKNOWN 6446 0x10080, // 10080..100FA; LINEAR_B 6447 0x100FB, // 100FB..100FF; UNKNOWN 6448 0x10100, // 10100..10102; COMMON 6449 0x10103, // 10103..10106; UNKNOWN 6450 0x10107, // 10107..10133; COMMON 6451 0x10134, // 10134..10136; UNKNOWN 6452 0x10137, // 10137..1013F; COMMON 6453 0x10140, // 10140..1018E; GREEK 6454 0x1018F, // 1018F ; UNKNOWN 6455 0x10190, // 10190..1019C; COMMON 6456 0x1019D, // 1019D..1019F; UNKNOWN 6457 0x101A0, // 101A0 ; GREEK 6458 0x101A1, // 101A1..101CF; UNKNOWN 6459 0x101D0, // 101D0..101FC; COMMON 6460 0x101FD, // 101FD ; INHERITED 6461 0x101FE, // 101FE..1027F; UNKNOWN 6462 0x10280, // 10280..1029C; LYCIAN 6463 0x1029D, // 1029D..1029F; UNKNOWN 6464 0x102A0, // 102A0..102D0; CARIAN 6465 0x102D1, // 102D1..102DF; UNKNOWN 6466 0x102E0, // 102E0 ; INHERITED 6467 0x102E1, // 102E1..102FB; COMMON 6468 0x102FC, // 102FC..102FF; UNKNOWN 6469 0x10300, // 10300..10323; OLD_ITALIC 6470 0x10324, // 10324..1032C; UNKNOWN 6471 0x1032D, // 1032D..1032F; OLD_ITALIC 6472 0x10330, // 10330..1034A; GOTHIC 6473 0x1034B, // 1034B..1034F; UNKNOWN 6474 0x10350, // 10350..1037A; OLD_PERMIC 6475 0x1037B, // 1037B..1037F; UNKNOWN 6476 0x10380, // 10380..1039D; UGARITIC 6477 0x1039E, // 1039E ; UNKNOWN 6478 0x1039F, // 1039F ; UGARITIC 6479 0x103A0, // 103A0..103C3; OLD_PERSIAN 6480 0x103C4, // 103C4..103C7; UNKNOWN 6481 0x103C8, // 103C8..103D5; OLD_PERSIAN 6482 0x103D6, // 103D6..103FF; UNKNOWN 6483 0x10400, // 10400..1044F; DESERET 6484 0x10450, // 10450..1047F; SHAVIAN 6485 0x10480, // 10480..1049D; OSMANYA 6486 0x1049E, // 1049E..1049F; UNKNOWN 6487 0x104A0, // 104A0..104A9; OSMANYA 6488 0x104AA, // 104AA..104AF; UNKNOWN 6489 0x104B0, // 104B0..104D3; OSAGE 6490 0x104D4, // 104D4..104D7; UNKNOWN 6491 0x104D8, // 104D8..104FB; OSAGE 6492 0x104FC, // 104FC..104FF; UNKNOWN 6493 0x10500, // 10500..10527; ELBASAN 6494 0x10528, // 10528..1052F; UNKNOWN 6495 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6496 0x10564, // 10564..1056E; UNKNOWN 6497 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6498 0x10570, // 10570..1057A; VITHKUQI 6499 0x1057B, // 1057B ; UNKNOWN 6500 0x1057C, // 1057C..1058A; VITHKUQI 6501 0x1058B, // 1058B ; UNKNOWN 6502 0x1058C, // 1058C..10592; VITHKUQI 6503 0x10593, // 10593 ; UNKNOWN 6504 0x10594, // 10594..10595; VITHKUQI 6505 0x10596, // 10596 ; UNKNOWN 6506 0x10597, // 10597..105A1; VITHKUQI 6507 0x105A2, // 105A2 ; UNKNOWN 6508 0x105A3, // 105A3..105B1; VITHKUQI 6509 0x105B2, // 105B2 ; UNKNOWN 6510 0x105B3, // 105B3..105B9; VITHKUQI 6511 0x105BA, // 105BA ; UNKNOWN 6512 0x105BB, // 105BB..105BC; VITHKUQI 6513 0x105BD, // 105BD..105BF; UNKNOWN 6514 0x105C0, // 105C0..105F3; TODHRI 6515 0x105F4, // 105F4..105FF; UNKNOWN 6516 0x10600, // 10600..10736; LINEAR_A 6517 0x10737, // 10737..1073F; UNKNOWN 6518 0x10740, // 10740..10755; LINEAR_A 6519 0x10756, // 10756..1075F; UNKNOWN 6520 0x10760, // 10760..10767; LINEAR_A 6521 0x10768, // 10768..1077F; UNKNOWN 6522 0x10780, // 10780..10785; LATIN 6523 0x10786, // 10786 ; UNKNOWN 6524 0x10787, // 10787..107B0; LATIN 6525 0x107B1, // 107B1 ; UNKNOWN 6526 0x107B2, // 107B2..107BA; LATIN 6527 0x107BB, // 107BB..107FF; UNKNOWN 6528 0x10800, // 10800..10805; CYPRIOT 6529 0x10806, // 10806..10807; UNKNOWN 6530 0x10808, // 10808 ; CYPRIOT 6531 0x10809, // 10809 ; UNKNOWN 6532 0x1080A, // 1080A..10835; CYPRIOT 6533 0x10836, // 10836 ; UNKNOWN 6534 0x10837, // 10837..10838; CYPRIOT 6535 0x10839, // 10839..1083B; UNKNOWN 6536 0x1083C, // 1083C ; CYPRIOT 6537 0x1083D, // 1083D..1083E; UNKNOWN 6538 0x1083F, // 1083F ; CYPRIOT 6539 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6540 0x10856, // 10856 ; UNKNOWN 6541 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6542 0x10860, // 10860..1087F; PALMYRENE 6543 0x10880, // 10880..1089E; NABATAEAN 6544 0x1089F, // 1089F..108A6; UNKNOWN 6545 0x108A7, // 108A7..108AF; NABATAEAN 6546 0x108B0, // 108B0..108DF; UNKNOWN 6547 0x108E0, // 108E0..108F2; HATRAN 6548 0x108F3, // 108F3 ; UNKNOWN 6549 0x108F4, // 108F4..108F5; HATRAN 6550 0x108F6, // 108F6..108FA; UNKNOWN 6551 0x108FB, // 108FB..108FF; HATRAN 6552 0x10900, // 10900..1091B; PHOENICIAN 6553 0x1091C, // 1091C..1091E; UNKNOWN 6554 0x1091F, // 1091F ; PHOENICIAN 6555 0x10920, // 10920..10939; LYDIAN 6556 0x1093A, // 1093A..1093E; UNKNOWN 6557 0x1093F, // 1093F ; LYDIAN 6558 0x10940, // 10940..1097F; UNKNOWN 6559 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6560 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6561 0x109B8, // 109B8..109BB; UNKNOWN 6562 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6563 0x109D0, // 109D0..109D1; UNKNOWN 6564 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6565 0x10A00, // 10A00..10A03; KHAROSHTHI 6566 0x10A04, // 10A04 ; UNKNOWN 6567 0x10A05, // 10A05..10A06; KHAROSHTHI 6568 0x10A07, // 10A07..10A0B; UNKNOWN 6569 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6570 0x10A14, // 10A14 ; UNKNOWN 6571 0x10A15, // 10A15..10A17; KHAROSHTHI 6572 0x10A18, // 10A18 ; UNKNOWN 6573 0x10A19, // 10A19..10A35; KHAROSHTHI 6574 0x10A36, // 10A36..10A37; UNKNOWN 6575 0x10A38, // 10A38..10A3A; KHAROSHTHI 6576 0x10A3B, // 10A3B..10A3E; UNKNOWN 6577 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6578 0x10A49, // 10A49..10A4F; UNKNOWN 6579 0x10A50, // 10A50..10A58; KHAROSHTHI 6580 0x10A59, // 10A59..10A5F; UNKNOWN 6581 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6582 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6583 0x10AA0, // 10AA0..10ABF; UNKNOWN 6584 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6585 0x10AE7, // 10AE7..10AEA; UNKNOWN 6586 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6587 0x10AF7, // 10AF7..10AFF; UNKNOWN 6588 0x10B00, // 10B00..10B35; AVESTAN 6589 0x10B36, // 10B36..10B38; UNKNOWN 6590 0x10B39, // 10B39..10B3F; AVESTAN 6591 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6592 0x10B56, // 10B56..10B57; UNKNOWN 6593 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6594 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6595 0x10B73, // 10B73..10B77; UNKNOWN 6596 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6597 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6598 0x10B92, // 10B92..10B98; UNKNOWN 6599 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6600 0x10B9D, // 10B9D..10BA8; UNKNOWN 6601 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6602 0x10BB0, // 10BB0..10BFF; UNKNOWN 6603 0x10C00, // 10C00..10C48; OLD_TURKIC 6604 0x10C49, // 10C49..10C7F; UNKNOWN 6605 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6606 0x10CB3, // 10CB3..10CBF; UNKNOWN 6607 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6608 0x10CF3, // 10CF3..10CF9; UNKNOWN 6609 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6610 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6611 0x10D28, // 10D28..10D2F; UNKNOWN 6612 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6613 0x10D3A, // 10D3A..10D3F; UNKNOWN 6614 0x10D40, // 10D40..10D65; GARAY 6615 0x10D66, // 10D66..10D68; UNKNOWN 6616 0x10D69, // 10D69..10D85; GARAY 6617 0x10D86, // 10D86..10D8D; UNKNOWN 6618 0x10D8E, // 10D8E..10D8F; GARAY 6619 0x10D90, // 10D90..10E5F; UNKNOWN 6620 0x10E60, // 10E60..10E7E; ARABIC 6621 0x10E7F, // 10E7F ; UNKNOWN 6622 0x10E80, // 10E80..10EA9; YEZIDI 6623 0x10EAA, // 10EAA ; UNKNOWN 6624 0x10EAB, // 10EAB..10EAD; YEZIDI 6625 0x10EAE, // 10EAE..10EAF; UNKNOWN 6626 0x10EB0, // 10EB0..10EB1; YEZIDI 6627 0x10EB2, // 10EB2..10EC1; UNKNOWN 6628 0x10EC2, // 10EC2..10EC4; ARABIC 6629 0x10EC5, // 10EC5..10EFB; UNKNOWN 6630 0x10EFC, // 10EFC..10EFF; ARABIC 6631 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6632 0x10F28, // 10F28..10F2F; UNKNOWN 6633 0x10F30, // 10F30..10F59; SOGDIAN 6634 0x10F5A, // 10F5A..10F6F; UNKNOWN 6635 0x10F70, // 10F70..10F89; OLD_UYGHUR 6636 0x10F8A, // 10F8A..10FAF; UNKNOWN 6637 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6638 0x10FCC, // 10FCC..10FDF; UNKNOWN 6639 0x10FE0, // 10FE0..10FF6; ELYMAIC 6640 0x10FF7, // 10FF7..10FFF; UNKNOWN 6641 0x11000, // 11000..1104D; BRAHMI 6642 0x1104E, // 1104E..11051; UNKNOWN 6643 0x11052, // 11052..11075; BRAHMI 6644 0x11076, // 11076..1107E; UNKNOWN 6645 0x1107F, // 1107F ; BRAHMI 6646 0x11080, // 11080..110C2; KAITHI 6647 0x110C3, // 110C3..110CC; UNKNOWN 6648 0x110CD, // 110CD ; KAITHI 6649 0x110CE, // 110CE..110CF; UNKNOWN 6650 0x110D0, // 110D0..110E8; SORA_SOMPENG 6651 0x110E9, // 110E9..110EF; UNKNOWN 6652 0x110F0, // 110F0..110F9; SORA_SOMPENG 6653 0x110FA, // 110FA..110FF; UNKNOWN 6654 0x11100, // 11100..11134; CHAKMA 6655 0x11135, // 11135 ; UNKNOWN 6656 0x11136, // 11136..11147; CHAKMA 6657 0x11148, // 11148..1114F; UNKNOWN 6658 0x11150, // 11150..11176; MAHAJANI 6659 0x11177, // 11177..1117F; UNKNOWN 6660 0x11180, // 11180..111DF; SHARADA 6661 0x111E0, // 111E0 ; UNKNOWN 6662 0x111E1, // 111E1..111F4; SINHALA 6663 0x111F5, // 111F5..111FF; UNKNOWN 6664 0x11200, // 11200..11211; KHOJKI 6665 0x11212, // 11212 ; UNKNOWN 6666 0x11213, // 11213..11241; KHOJKI 6667 0x11242, // 11242..1127F; UNKNOWN 6668 0x11280, // 11280..11286; MULTANI 6669 0x11287, // 11287 ; UNKNOWN 6670 0x11288, // 11288 ; MULTANI 6671 0x11289, // 11289 ; UNKNOWN 6672 0x1128A, // 1128A..1128D; MULTANI 6673 0x1128E, // 1128E ; UNKNOWN 6674 0x1128F, // 1128F..1129D; MULTANI 6675 0x1129E, // 1129E ; UNKNOWN 6676 0x1129F, // 1129F..112A9; MULTANI 6677 0x112AA, // 112AA..112AF; UNKNOWN 6678 0x112B0, // 112B0..112EA; KHUDAWADI 6679 0x112EB, // 112EB..112EF; UNKNOWN 6680 0x112F0, // 112F0..112F9; KHUDAWADI 6681 0x112FA, // 112FA..112FF; UNKNOWN 6682 0x11300, // 11300..11303; GRANTHA 6683 0x11304, // 11304 ; UNKNOWN 6684 0x11305, // 11305..1130C; GRANTHA 6685 0x1130D, // 1130D..1130E; UNKNOWN 6686 0x1130F, // 1130F..11310; GRANTHA 6687 0x11311, // 11311..11312; UNKNOWN 6688 0x11313, // 11313..11328; GRANTHA 6689 0x11329, // 11329 ; UNKNOWN 6690 0x1132A, // 1132A..11330; GRANTHA 6691 0x11331, // 11331 ; UNKNOWN 6692 0x11332, // 11332..11333; GRANTHA 6693 0x11334, // 11334 ; UNKNOWN 6694 0x11335, // 11335..11339; GRANTHA 6695 0x1133A, // 1133A ; UNKNOWN 6696 0x1133B, // 1133B ; INHERITED 6697 0x1133C, // 1133C..11344; GRANTHA 6698 0x11345, // 11345..11346; UNKNOWN 6699 0x11347, // 11347..11348; GRANTHA 6700 0x11349, // 11349..1134A; UNKNOWN 6701 0x1134B, // 1134B..1134D; GRANTHA 6702 0x1134E, // 1134E..1134F; UNKNOWN 6703 0x11350, // 11350 ; GRANTHA 6704 0x11351, // 11351..11356; UNKNOWN 6705 0x11357, // 11357 ; GRANTHA 6706 0x11358, // 11358..1135C; UNKNOWN 6707 0x1135D, // 1135D..11363; GRANTHA 6708 0x11364, // 11364..11365; UNKNOWN 6709 0x11366, // 11366..1136C; GRANTHA 6710 0x1136D, // 1136D..1136F; UNKNOWN 6711 0x11370, // 11370..11374; GRANTHA 6712 0x11375, // 11375..1137F; UNKNOWN 6713 0x11380, // 11380..11389; TULU_TIGALARI 6714 0x1138A, // 1138A ; UNKNOWN 6715 0x1138B, // 1138B ; TULU_TIGALARI 6716 0x1138C, // 1138C..1138D; UNKNOWN 6717 0x1138E, // 1138E ; TULU_TIGALARI 6718 0x1138F, // 1138F ; UNKNOWN 6719 0x11390, // 11390..113B5; TULU_TIGALARI 6720 0x113B6, // 113B6 ; UNKNOWN 6721 0x113B7, // 113B7..113C0; TULU_TIGALARI 6722 0x113C1, // 113C1 ; UNKNOWN 6723 0x113C2, // 113C2 ; TULU_TIGALARI 6724 0x113C3, // 113C3..113C4; UNKNOWN 6725 0x113C5, // 113C5 ; TULU_TIGALARI 6726 0x113C6, // 113C6 ; UNKNOWN 6727 0x113C7, // 113C7..113CA; TULU_TIGALARI 6728 0x113CB, // 113CB ; UNKNOWN 6729 0x113CC, // 113CC..113D5; TULU_TIGALARI 6730 0x113D6, // 113D6 ; UNKNOWN 6731 0x113D7, // 113D7..113D8; TULU_TIGALARI 6732 0x113D9, // 113D9..113E0; UNKNOWN 6733 0x113E1, // 113E1..113E2; TULU_TIGALARI 6734 0x113E3, // 113E3..113FF; UNKNOWN 6735 0x11400, // 11400..1145B; NEWA 6736 0x1145C, // 1145C ; UNKNOWN 6737 0x1145D, // 1145D..11461; NEWA 6738 0x11462, // 11462..1147F; UNKNOWN 6739 0x11480, // 11480..114C7; TIRHUTA 6740 0x114C8, // 114C8..114CF; UNKNOWN 6741 0x114D0, // 114D0..114D9; TIRHUTA 6742 0x114DA, // 114DA..1157F; UNKNOWN 6743 0x11580, // 11580..115B5; SIDDHAM 6744 0x115B6, // 115B6..115B7; UNKNOWN 6745 0x115B8, // 115B8..115DD; SIDDHAM 6746 0x115DE, // 115DE..115FF; UNKNOWN 6747 0x11600, // 11600..11644; MODI 6748 0x11645, // 11645..1164F; UNKNOWN 6749 0x11650, // 11650..11659; MODI 6750 0x1165A, // 1165A..1165F; UNKNOWN 6751 0x11660, // 11660..1166C; MONGOLIAN 6752 0x1166D, // 1166D..1167F; UNKNOWN 6753 0x11680, // 11680..116B9; TAKRI 6754 0x116BA, // 116BA..116BF; UNKNOWN 6755 0x116C0, // 116C0..116C9; TAKRI 6756 0x116CA, // 116CA..116CF; UNKNOWN 6757 0x116D0, // 116D0..116E3; MYANMAR 6758 0x116E4, // 116E4..116FF; UNKNOWN 6759 0x11700, // 11700..1171A; AHOM 6760 0x1171B, // 1171B..1171C; UNKNOWN 6761 0x1171D, // 1171D..1172B; AHOM 6762 0x1172C, // 1172C..1172F; UNKNOWN 6763 0x11730, // 11730..11746; AHOM 6764 0x11747, // 11747..117FF; UNKNOWN 6765 0x11800, // 11800..1183B; DOGRA 6766 0x1183C, // 1183C..1189F; UNKNOWN 6767 0x118A0, // 118A0..118F2; WARANG_CITI 6768 0x118F3, // 118F3..118FE; UNKNOWN 6769 0x118FF, // 118FF ; WARANG_CITI 6770 0x11900, // 11900..11906; DIVES_AKURU 6771 0x11907, // 11907..11908; UNKNOWN 6772 0x11909, // 11909 ; DIVES_AKURU 6773 0x1190A, // 1190A..1190B; UNKNOWN 6774 0x1190C, // 1190C..11913; DIVES_AKURU 6775 0x11914, // 11914 ; UNKNOWN 6776 0x11915, // 11915..11916; DIVES_AKURU 6777 0x11917, // 11917 ; UNKNOWN 6778 0x11918, // 11918..11935; DIVES_AKURU 6779 0x11936, // 11936 ; UNKNOWN 6780 0x11937, // 11937..11938; DIVES_AKURU 6781 0x11939, // 11939..1193A; UNKNOWN 6782 0x1193B, // 1193B..11946; DIVES_AKURU 6783 0x11947, // 11947..1194F; UNKNOWN 6784 0x11950, // 11950..11959; DIVES_AKURU 6785 0x1195A, // 1195A..1199F; UNKNOWN 6786 0x119A0, // 119A0..119A7; NANDINAGARI 6787 0x119A8, // 119A8..119A9; UNKNOWN 6788 0x119AA, // 119AA..119D7; NANDINAGARI 6789 0x119D8, // 119D8..119D9; UNKNOWN 6790 0x119DA, // 119DA..119E4; NANDINAGARI 6791 0x119E5, // 119E5..119FF; UNKNOWN 6792 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6793 0x11A48, // 11A48..11A4F; UNKNOWN 6794 0x11A50, // 11A50..11AA2; SOYOMBO 6795 0x11AA3, // 11AA3..11AAF; UNKNOWN 6796 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6797 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6798 0x11AF9, // 11AF9..11AFF; UNKNOWN 6799 0x11B00, // 11B00..11B09; DEVANAGARI 6800 0x11B0A, // 11B0A..11BBF; UNKNOWN 6801 0x11BC0, // 11BC0..11BE1; SUNUWAR 6802 0x11BE2, // 11BE2..11BEF; UNKNOWN 6803 0x11BF0, // 11BF0..11BF9; SUNUWAR 6804 0x11BFA, // 11BFA..11BFF; UNKNOWN 6805 0x11C00, // 11C00..11C08; BHAIKSUKI 6806 0x11C09, // 11C09 ; UNKNOWN 6807 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6808 0x11C37, // 11C37 ; UNKNOWN 6809 0x11C38, // 11C38..11C45; BHAIKSUKI 6810 0x11C46, // 11C46..11C4F; UNKNOWN 6811 0x11C50, // 11C50..11C6C; BHAIKSUKI 6812 0x11C6D, // 11C6D..11C6F; UNKNOWN 6813 0x11C70, // 11C70..11C8F; MARCHEN 6814 0x11C90, // 11C90..11C91; UNKNOWN 6815 0x11C92, // 11C92..11CA7; MARCHEN 6816 0x11CA8, // 11CA8 ; UNKNOWN 6817 0x11CA9, // 11CA9..11CB6; MARCHEN 6818 0x11CB7, // 11CB7..11CFF; UNKNOWN 6819 0x11D00, // 11D00..11D06; MASARAM_GONDI 6820 0x11D07, // 11D07 ; UNKNOWN 6821 0x11D08, // 11D08..11D09; MASARAM_GONDI 6822 0x11D0A, // 11D0A ; UNKNOWN 6823 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6824 0x11D37, // 11D37..11D39; UNKNOWN 6825 0x11D3A, // 11D3A ; MASARAM_GONDI 6826 0x11D3B, // 11D3B ; UNKNOWN 6827 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6828 0x11D3E, // 11D3E ; UNKNOWN 6829 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6830 0x11D48, // 11D48..11D4F; UNKNOWN 6831 0x11D50, // 11D50..11D59; MASARAM_GONDI 6832 0x11D5A, // 11D5A..11D5F; UNKNOWN 6833 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6834 0x11D66, // 11D66 ; UNKNOWN 6835 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6836 0x11D69, // 11D69 ; UNKNOWN 6837 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6838 0x11D8F, // 11D8F ; UNKNOWN 6839 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6840 0x11D92, // 11D92 ; UNKNOWN 6841 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6842 0x11D99, // 11D99..11D9F; UNKNOWN 6843 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6844 0x11DAA, // 11DAA..11EDF; UNKNOWN 6845 0x11EE0, // 11EE0..11EF8; MAKASAR 6846 0x11EF9, // 11EF9..11EFF; UNKNOWN 6847 0x11F00, // 11F00..11F10; KAWI 6848 0x11F11, // 11F11 ; UNKNOWN 6849 0x11F12, // 11F12..11F3A; KAWI 6850 0x11F3B, // 11F3B..11F3D; UNKNOWN 6851 0x11F3E, // 11F3E..11F5A; KAWI 6852 0x11F5B, // 11F5B..11FAF; UNKNOWN 6853 0x11FB0, // 11FB0 ; LISU 6854 0x11FB1, // 11FB1..11FBF; UNKNOWN 6855 0x11FC0, // 11FC0..11FF1; TAMIL 6856 0x11FF2, // 11FF2..11FFE; UNKNOWN 6857 0x11FFF, // 11FFF ; TAMIL 6858 0x12000, // 12000..12399; CUNEIFORM 6859 0x1239A, // 1239A..123FF; UNKNOWN 6860 0x12400, // 12400..1246E; CUNEIFORM 6861 0x1246F, // 1246F ; UNKNOWN 6862 0x12470, // 12470..12474; CUNEIFORM 6863 0x12475, // 12475..1247F; UNKNOWN 6864 0x12480, // 12480..12543; CUNEIFORM 6865 0x12544, // 12544..12F8F; UNKNOWN 6866 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6867 0x12FF3, // 12FF3..12FFF; UNKNOWN 6868 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6869 0x13456, // 13456..1345F; UNKNOWN 6870 0x13460, // 13460..143FA; EGYPTIAN_HIEROGLYPHS 6871 0x143FB, // 143FB..143FF; UNKNOWN 6872 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6873 0x14647, // 14647..160FF; UNKNOWN 6874 0x16100, // 16100..16139; GURUNG_KHEMA 6875 0x1613A, // 1613A..167FF; UNKNOWN 6876 0x16800, // 16800..16A38; BAMUM 6877 0x16A39, // 16A39..16A3F; UNKNOWN 6878 0x16A40, // 16A40..16A5E; MRO 6879 0x16A5F, // 16A5F ; UNKNOWN 6880 0x16A60, // 16A60..16A69; MRO 6881 0x16A6A, // 16A6A..16A6D; UNKNOWN 6882 0x16A6E, // 16A6E..16A6F; MRO 6883 0x16A70, // 16A70..16ABE; TANGSA 6884 0x16ABF, // 16ABF ; UNKNOWN 6885 0x16AC0, // 16AC0..16AC9; TANGSA 6886 0x16ACA, // 16ACA..16ACF; UNKNOWN 6887 0x16AD0, // 16AD0..16AED; BASSA_VAH 6888 0x16AEE, // 16AEE..16AEF; UNKNOWN 6889 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6890 0x16AF6, // 16AF6..16AFF; UNKNOWN 6891 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6892 0x16B46, // 16B46..16B4F; UNKNOWN 6893 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6894 0x16B5A, // 16B5A ; UNKNOWN 6895 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6896 0x16B62, // 16B62 ; UNKNOWN 6897 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6898 0x16B78, // 16B78..16B7C; UNKNOWN 6899 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6900 0x16B90, // 16B90..16D3F; UNKNOWN 6901 0x16D40, // 16D40..16D79; KIRAT_RAI 6902 0x16D7A, // 16D7A..16E3F; UNKNOWN 6903 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6904 0x16E9B, // 16E9B..16EFF; UNKNOWN 6905 0x16F00, // 16F00..16F4A; MIAO 6906 0x16F4B, // 16F4B..16F4E; UNKNOWN 6907 0x16F4F, // 16F4F..16F87; MIAO 6908 0x16F88, // 16F88..16F8E; UNKNOWN 6909 0x16F8F, // 16F8F..16F9F; MIAO 6910 0x16FA0, // 16FA0..16FDF; UNKNOWN 6911 0x16FE0, // 16FE0 ; TANGUT 6912 0x16FE1, // 16FE1 ; NUSHU 6913 0x16FE2, // 16FE2..16FE3; HAN 6914 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6915 0x16FE5, // 16FE5..16FEF; UNKNOWN 6916 0x16FF0, // 16FF0..16FF1; HAN 6917 0x16FF2, // 16FF2..16FFF; UNKNOWN 6918 0x17000, // 17000..187F7; TANGUT 6919 0x187F8, // 187F8..187FF; UNKNOWN 6920 0x18800, // 18800..18AFF; TANGUT 6921 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6922 0x18CD6, // 18CD6..18CFE; UNKNOWN 6923 0x18CFF, // 18CFF ; KHITAN_SMALL_SCRIPT 6924 0x18D00, // 18D00..18D08; TANGUT 6925 0x18D09, // 18D09..1AFEF; UNKNOWN 6926 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 6927 0x1AFF4, // 1AFF4 ; UNKNOWN 6928 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 6929 0x1AFFC, // 1AFFC ; UNKNOWN 6930 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 6931 0x1AFFF, // 1AFFF ; UNKNOWN 6932 0x1B000, // 1B000 ; KATAKANA 6933 0x1B001, // 1B001..1B11F; HIRAGANA 6934 0x1B120, // 1B120..1B122; KATAKANA 6935 0x1B123, // 1B123..1B131; UNKNOWN 6936 0x1B132, // 1B132 ; HIRAGANA 6937 0x1B133, // 1B133..1B14F; UNKNOWN 6938 0x1B150, // 1B150..1B152; HIRAGANA 6939 0x1B153, // 1B153..1B154; UNKNOWN 6940 0x1B155, // 1B155 ; KATAKANA 6941 0x1B156, // 1B156..1B163; UNKNOWN 6942 0x1B164, // 1B164..1B167; KATAKANA 6943 0x1B168, // 1B168..1B16F; UNKNOWN 6944 0x1B170, // 1B170..1B2FB; NUSHU 6945 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6946 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6947 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6948 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6949 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6950 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6951 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6952 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6953 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6954 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6955 0x1BCA0, // 1BCA0..1BCA3; COMMON 6956 0x1BCA4, // 1BCA4..1CBFF; UNKNOWN 6957 0x1CC00, // 1CC00..1CCF9; COMMON 6958 0x1CCFA, // 1CCFA..1CCFF; UNKNOWN 6959 0x1CD00, // 1CD00..1CEB3; COMMON 6960 0x1CEB4, // 1CEB4..1CEFF; UNKNOWN 6961 0x1CF00, // 1CF00..1CF2D; INHERITED 6962 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 6963 0x1CF30, // 1CF30..1CF46; INHERITED 6964 0x1CF47, // 1CF47..1CF4F; UNKNOWN 6965 0x1CF50, // 1CF50..1CFC3; COMMON 6966 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 6967 0x1D000, // 1D000..1D0F5; COMMON 6968 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6969 0x1D100, // 1D100..1D126; COMMON 6970 0x1D127, // 1D127..1D128; UNKNOWN 6971 0x1D129, // 1D129..1D166; COMMON 6972 0x1D167, // 1D167..1D169; INHERITED 6973 0x1D16A, // 1D16A..1D17A; COMMON 6974 0x1D17B, // 1D17B..1D182; INHERITED 6975 0x1D183, // 1D183..1D184; COMMON 6976 0x1D185, // 1D185..1D18B; INHERITED 6977 0x1D18C, // 1D18C..1D1A9; COMMON 6978 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6979 0x1D1AE, // 1D1AE..1D1EA; COMMON 6980 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 6981 0x1D200, // 1D200..1D245; GREEK 6982 0x1D246, // 1D246..1D2BF; UNKNOWN 6983 0x1D2C0, // 1D2C0..1D2D3; COMMON 6984 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 6985 0x1D2E0, // 1D2E0..1D2F3; COMMON 6986 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6987 0x1D300, // 1D300..1D356; COMMON 6988 0x1D357, // 1D357..1D35F; UNKNOWN 6989 0x1D360, // 1D360..1D378; COMMON 6990 0x1D379, // 1D379..1D3FF; UNKNOWN 6991 0x1D400, // 1D400..1D454; COMMON 6992 0x1D455, // 1D455 ; UNKNOWN 6993 0x1D456, // 1D456..1D49C; COMMON 6994 0x1D49D, // 1D49D ; UNKNOWN 6995 0x1D49E, // 1D49E..1D49F; COMMON 6996 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6997 0x1D4A2, // 1D4A2 ; COMMON 6998 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6999 0x1D4A5, // 1D4A5..1D4A6; COMMON 7000 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 7001 0x1D4A9, // 1D4A9..1D4AC; COMMON 7002 0x1D4AD, // 1D4AD ; UNKNOWN 7003 0x1D4AE, // 1D4AE..1D4B9; COMMON 7004 0x1D4BA, // 1D4BA ; UNKNOWN 7005 0x1D4BB, // 1D4BB ; COMMON 7006 0x1D4BC, // 1D4BC ; UNKNOWN 7007 0x1D4BD, // 1D4BD..1D4C3; COMMON 7008 0x1D4C4, // 1D4C4 ; UNKNOWN 7009 0x1D4C5, // 1D4C5..1D505; COMMON 7010 0x1D506, // 1D506 ; UNKNOWN 7011 0x1D507, // 1D507..1D50A; COMMON 7012 0x1D50B, // 1D50B..1D50C; UNKNOWN 7013 0x1D50D, // 1D50D..1D514; COMMON 7014 0x1D515, // 1D515 ; UNKNOWN 7015 0x1D516, // 1D516..1D51C; COMMON 7016 0x1D51D, // 1D51D ; UNKNOWN 7017 0x1D51E, // 1D51E..1D539; COMMON 7018 0x1D53A, // 1D53A ; UNKNOWN 7019 0x1D53B, // 1D53B..1D53E; COMMON 7020 0x1D53F, // 1D53F ; UNKNOWN 7021 0x1D540, // 1D540..1D544; COMMON 7022 0x1D545, // 1D545 ; UNKNOWN 7023 0x1D546, // 1D546 ; COMMON 7024 0x1D547, // 1D547..1D549; UNKNOWN 7025 0x1D54A, // 1D54A..1D550; COMMON 7026 0x1D551, // 1D551 ; UNKNOWN 7027 0x1D552, // 1D552..1D6A5; COMMON 7028 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 7029 0x1D6A8, // 1D6A8..1D7CB; COMMON 7030 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 7031 0x1D7CE, // 1D7CE..1D7FF; COMMON 7032 0x1D800, // 1D800..1DA8B; SIGNWRITING 7033 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 7034 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 7035 0x1DAA0, // 1DAA0 ; UNKNOWN 7036 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 7037 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 7038 0x1DF00, // 1DF00..1DF1E; LATIN 7039 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 7040 0x1DF25, // 1DF25..1DF2A; LATIN 7041 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 7042 0x1E000, // 1E000..1E006; GLAGOLITIC 7043 0x1E007, // 1E007 ; UNKNOWN 7044 0x1E008, // 1E008..1E018; GLAGOLITIC 7045 0x1E019, // 1E019..1E01A; UNKNOWN 7046 0x1E01B, // 1E01B..1E021; GLAGOLITIC 7047 0x1E022, // 1E022 ; UNKNOWN 7048 0x1E023, // 1E023..1E024; GLAGOLITIC 7049 0x1E025, // 1E025 ; UNKNOWN 7050 0x1E026, // 1E026..1E02A; GLAGOLITIC 7051 0x1E02B, // 1E02B..1E02F; UNKNOWN 7052 0x1E030, // 1E030..1E06D; CYRILLIC 7053 0x1E06E, // 1E06E..1E08E; UNKNOWN 7054 0x1E08F, // 1E08F ; CYRILLIC 7055 0x1E090, // 1E090..1E0FF; UNKNOWN 7056 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 7057 0x1E12D, // 1E12D..1E12F; UNKNOWN 7058 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 7059 0x1E13E, // 1E13E..1E13F; UNKNOWN 7060 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 7061 0x1E14A, // 1E14A..1E14D; UNKNOWN 7062 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 7063 0x1E150, // 1E150..1E28F; UNKNOWN 7064 0x1E290, // 1E290..1E2AE; TOTO 7065 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 7066 0x1E2C0, // 1E2C0..1E2F9; WANCHO 7067 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 7068 0x1E2FF, // 1E2FF ; WANCHO 7069 0x1E300, // 1E300..1E4CF; UNKNOWN 7070 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 7071 0x1E4FA, // 1E4FA..1E5CF; UNKNOWN 7072 0x1E5D0, // 1E5D0..1E5FA; OL_ONAL 7073 0x1E5FB, // 1E5FB..1E5FE; UNKNOWN 7074 0x1E5FF, // 1E5FF ; OL_ONAL 7075 0x1E600, // 1E600..1E7DF; UNKNOWN 7076 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 7077 0x1E7E7, // 1E7E7 ; UNKNOWN 7078 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 7079 0x1E7EC, // 1E7EC ; UNKNOWN 7080 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 7081 0x1E7EF, // 1E7EF ; UNKNOWN 7082 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 7083 0x1E7FF, // 1E7FF ; UNKNOWN 7084 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 7085 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 7086 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 7087 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 7088 0x1E900, // 1E900..1E94B; ADLAM 7089 0x1E94C, // 1E94C..1E94F; UNKNOWN 7090 0x1E950, // 1E950..1E959; ADLAM 7091 0x1E95A, // 1E95A..1E95D; UNKNOWN 7092 0x1E95E, // 1E95E..1E95F; ADLAM 7093 0x1E960, // 1E960..1EC70; UNKNOWN 7094 0x1EC71, // 1EC71..1ECB4; COMMON 7095 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 7096 0x1ED01, // 1ED01..1ED3D; COMMON 7097 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 7098 0x1EE00, // 1EE00..1EE03; ARABIC 7099 0x1EE04, // 1EE04 ; UNKNOWN 7100 0x1EE05, // 1EE05..1EE1F; ARABIC 7101 0x1EE20, // 1EE20 ; UNKNOWN 7102 0x1EE21, // 1EE21..1EE22; ARABIC 7103 0x1EE23, // 1EE23 ; UNKNOWN 7104 0x1EE24, // 1EE24 ; ARABIC 7105 0x1EE25, // 1EE25..1EE26; UNKNOWN 7106 0x1EE27, // 1EE27 ; ARABIC 7107 0x1EE28, // 1EE28 ; UNKNOWN 7108 0x1EE29, // 1EE29..1EE32; ARABIC 7109 0x1EE33, // 1EE33 ; UNKNOWN 7110 0x1EE34, // 1EE34..1EE37; ARABIC 7111 0x1EE38, // 1EE38 ; UNKNOWN 7112 0x1EE39, // 1EE39 ; ARABIC 7113 0x1EE3A, // 1EE3A ; UNKNOWN 7114 0x1EE3B, // 1EE3B ; ARABIC 7115 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 7116 0x1EE42, // 1EE42 ; ARABIC 7117 0x1EE43, // 1EE43..1EE46; UNKNOWN 7118 0x1EE47, // 1EE47 ; ARABIC 7119 0x1EE48, // 1EE48 ; UNKNOWN 7120 0x1EE49, // 1EE49 ; ARABIC 7121 0x1EE4A, // 1EE4A ; UNKNOWN 7122 0x1EE4B, // 1EE4B ; ARABIC 7123 0x1EE4C, // 1EE4C ; UNKNOWN 7124 0x1EE4D, // 1EE4D..1EE4F; ARABIC 7125 0x1EE50, // 1EE50 ; UNKNOWN 7126 0x1EE51, // 1EE51..1EE52; ARABIC 7127 0x1EE53, // 1EE53 ; UNKNOWN 7128 0x1EE54, // 1EE54 ; ARABIC 7129 0x1EE55, // 1EE55..1EE56; UNKNOWN 7130 0x1EE57, // 1EE57 ; ARABIC 7131 0x1EE58, // 1EE58 ; UNKNOWN 7132 0x1EE59, // 1EE59 ; ARABIC 7133 0x1EE5A, // 1EE5A ; UNKNOWN 7134 0x1EE5B, // 1EE5B ; ARABIC 7135 0x1EE5C, // 1EE5C ; UNKNOWN 7136 0x1EE5D, // 1EE5D ; ARABIC 7137 0x1EE5E, // 1EE5E ; UNKNOWN 7138 0x1EE5F, // 1EE5F ; ARABIC 7139 0x1EE60, // 1EE60 ; UNKNOWN 7140 0x1EE61, // 1EE61..1EE62; ARABIC 7141 0x1EE63, // 1EE63 ; UNKNOWN 7142 0x1EE64, // 1EE64 ; ARABIC 7143 0x1EE65, // 1EE65..1EE66; UNKNOWN 7144 0x1EE67, // 1EE67..1EE6A; ARABIC 7145 0x1EE6B, // 1EE6B ; UNKNOWN 7146 0x1EE6C, // 1EE6C..1EE72; ARABIC 7147 0x1EE73, // 1EE73 ; UNKNOWN 7148 0x1EE74, // 1EE74..1EE77; ARABIC 7149 0x1EE78, // 1EE78 ; UNKNOWN 7150 0x1EE79, // 1EE79..1EE7C; ARABIC 7151 0x1EE7D, // 1EE7D ; UNKNOWN 7152 0x1EE7E, // 1EE7E ; ARABIC 7153 0x1EE7F, // 1EE7F ; UNKNOWN 7154 0x1EE80, // 1EE80..1EE89; ARABIC 7155 0x1EE8A, // 1EE8A ; UNKNOWN 7156 0x1EE8B, // 1EE8B..1EE9B; ARABIC 7157 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 7158 0x1EEA1, // 1EEA1..1EEA3; ARABIC 7159 0x1EEA4, // 1EEA4 ; UNKNOWN 7160 0x1EEA5, // 1EEA5..1EEA9; ARABIC 7161 0x1EEAA, // 1EEAA ; UNKNOWN 7162 0x1EEAB, // 1EEAB..1EEBB; ARABIC 7163 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 7164 0x1EEF0, // 1EEF0..1EEF1; ARABIC 7165 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 7166 0x1F000, // 1F000..1F02B; COMMON 7167 0x1F02C, // 1F02C..1F02F; UNKNOWN 7168 0x1F030, // 1F030..1F093; COMMON 7169 0x1F094, // 1F094..1F09F; UNKNOWN 7170 0x1F0A0, // 1F0A0..1F0AE; COMMON 7171 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 7172 0x1F0B1, // 1F0B1..1F0BF; COMMON 7173 0x1F0C0, // 1F0C0 ; UNKNOWN 7174 0x1F0C1, // 1F0C1..1F0CF; COMMON 7175 0x1F0D0, // 1F0D0 ; UNKNOWN 7176 0x1F0D1, // 1F0D1..1F0F5; COMMON 7177 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 7178 0x1F100, // 1F100..1F1AD; COMMON 7179 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 7180 0x1F1E6, // 1F1E6..1F1FF; COMMON 7181 0x1F200, // 1F200 ; HIRAGANA 7182 0x1F201, // 1F201..1F202; COMMON 7183 0x1F203, // 1F203..1F20F; UNKNOWN 7184 0x1F210, // 1F210..1F23B; COMMON 7185 0x1F23C, // 1F23C..1F23F; UNKNOWN 7186 0x1F240, // 1F240..1F248; COMMON 7187 0x1F249, // 1F249..1F24F; UNKNOWN 7188 0x1F250, // 1F250..1F251; COMMON 7189 0x1F252, // 1F252..1F25F; UNKNOWN 7190 0x1F260, // 1F260..1F265; COMMON 7191 0x1F266, // 1F266..1F2FF; UNKNOWN 7192 0x1F300, // 1F300..1F6D7; COMMON 7193 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN 7194 0x1F6DC, // 1F6DC..1F6EC; COMMON 7195 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 7196 0x1F6F0, // 1F6F0..1F6FC; COMMON 7197 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 7198 0x1F700, // 1F700..1F776; COMMON 7199 0x1F777, // 1F777..1F77A; UNKNOWN 7200 0x1F77B, // 1F77B..1F7D9; COMMON 7201 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 7202 0x1F7E0, // 1F7E0..1F7EB; COMMON 7203 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 7204 0x1F7F0, // 1F7F0 ; COMMON 7205 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 7206 0x1F800, // 1F800..1F80B; COMMON 7207 0x1F80C, // 1F80C..1F80F; UNKNOWN 7208 0x1F810, // 1F810..1F847; COMMON 7209 0x1F848, // 1F848..1F84F; UNKNOWN 7210 0x1F850, // 1F850..1F859; COMMON 7211 0x1F85A, // 1F85A..1F85F; UNKNOWN 7212 0x1F860, // 1F860..1F887; COMMON 7213 0x1F888, // 1F888..1F88F; UNKNOWN 7214 0x1F890, // 1F890..1F8AD; COMMON 7215 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7216 0x1F8B0, // 1F8B0..1F8BB; COMMON 7217 0x1F8BC, // 1F8BC..1F8BF; UNKNOWN 7218 0x1F8C0, // 1F8C0..1F8C1; COMMON 7219 0x1F8C2, // 1F8C2..1F8FF; UNKNOWN 7220 0x1F900, // 1F900..1FA53; COMMON 7221 0x1FA54, // 1FA54..1FA5F; UNKNOWN 7222 0x1FA60, // 1FA60..1FA6D; COMMON 7223 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7224 0x1FA70, // 1FA70..1FA7C; COMMON 7225 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7226 0x1FA80, // 1FA80..1FA89; COMMON 7227 0x1FA8A, // 1FA8A..1FA8E; UNKNOWN 7228 0x1FA8F, // 1FA8F..1FAC6; COMMON 7229 0x1FAC7, // 1FAC7..1FACD; UNKNOWN 7230 0x1FACE, // 1FACE..1FADC; COMMON 7231 0x1FADD, // 1FADD..1FADE; UNKNOWN 7232 0x1FADF, // 1FADF..1FAE9; COMMON 7233 0x1FAEA, // 1FAEA..1FAEF; UNKNOWN 7234 0x1FAF0, // 1FAF0..1FAF8; COMMON 7235 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7236 0x1FB00, // 1FB00..1FB92; COMMON 7237 0x1FB93, // 1FB93 ; UNKNOWN 7238 0x1FB94, // 1FB94..1FBF9; COMMON 7239 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 7240 0x20000, // 20000..2A6DF; HAN 7241 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7242 0x2A700, // 2A700..2B739; HAN 7243 0x2B73A, // 2B73A..2B73F; UNKNOWN 7244 0x2B740, // 2B740..2B81D; HAN 7245 0x2B81E, // 2B81E..2B81F; UNKNOWN 7246 0x2B820, // 2B820..2CEA1; HAN 7247 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 7248 0x2CEB0, // 2CEB0..2EBE0; HAN 7249 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN 7250 0x2EBF0, // 2EBF0..2EE5D; HAN 7251 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN 7252 0x2F800, // 2F800..2FA1D; HAN 7253 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7254 0x30000, // 30000..3134A; HAN 7255 0x3134B, // 3134B..3134F; UNKNOWN 7256 0x31350, // 31350..323AF; HAN 7257 0x323B0, // 323B0..E0000; UNKNOWN 7258 0xE0001, // E0001 ; COMMON 7259 0xE0002, // E0002..E001F; UNKNOWN 7260 0xE0020, // E0020..E007F; COMMON 7261 0xE0080, // E0080..E00FF; UNKNOWN 7262 0xE0100, // E0100..E01EF; INHERITED 7263 0xE01F0, // E01F0..10FFFF; UNKNOWN 7264 }; 7265 7266 private static final UnicodeScript[] scripts = { 7267 COMMON, // 0000..0040 7268 LATIN, // 0041..005A 7269 COMMON, // 005B..0060 7270 LATIN, // 0061..007A 7271 COMMON, // 007B..00A9 7272 LATIN, // 00AA 7273 COMMON, // 00AB..00B9 7274 LATIN, // 00BA 7275 COMMON, // 00BB..00BF 7276 LATIN, // 00C0..00D6 7277 COMMON, // 00D7 7278 LATIN, // 00D8..00F6 7279 COMMON, // 00F7 7280 LATIN, // 00F8..02B8 7281 COMMON, // 02B9..02DF 7282 LATIN, // 02E0..02E4 7283 COMMON, // 02E5..02E9 7284 BOPOMOFO, // 02EA..02EB 7285 COMMON, // 02EC..02FF 7286 INHERITED, // 0300..036F 7287 GREEK, // 0370..0373 7288 COMMON, // 0374 7289 GREEK, // 0375..0377 7290 UNKNOWN, // 0378..0379 7291 GREEK, // 037A..037D 7292 COMMON, // 037E 7293 GREEK, // 037F 7294 UNKNOWN, // 0380..0383 7295 GREEK, // 0384 7296 COMMON, // 0385 7297 GREEK, // 0386 7298 COMMON, // 0387 7299 GREEK, // 0388..038A 7300 UNKNOWN, // 038B 7301 GREEK, // 038C 7302 UNKNOWN, // 038D 7303 GREEK, // 038E..03A1 7304 UNKNOWN, // 03A2 7305 GREEK, // 03A3..03E1 7306 COPTIC, // 03E2..03EF 7307 GREEK, // 03F0..03FF 7308 CYRILLIC, // 0400..0484 7309 INHERITED, // 0485..0486 7310 CYRILLIC, // 0487..052F 7311 UNKNOWN, // 0530 7312 ARMENIAN, // 0531..0556 7313 UNKNOWN, // 0557..0558 7314 ARMENIAN, // 0559..058A 7315 UNKNOWN, // 058B..058C 7316 ARMENIAN, // 058D..058F 7317 UNKNOWN, // 0590 7318 HEBREW, // 0591..05C7 7319 UNKNOWN, // 05C8..05CF 7320 HEBREW, // 05D0..05EA 7321 UNKNOWN, // 05EB..05EE 7322 HEBREW, // 05EF..05F4 7323 UNKNOWN, // 05F5..05FF 7324 ARABIC, // 0600..0604 7325 COMMON, // 0605 7326 ARABIC, // 0606..060B 7327 COMMON, // 060C 7328 ARABIC, // 060D..061A 7329 COMMON, // 061B 7330 ARABIC, // 061C..061E 7331 COMMON, // 061F 7332 ARABIC, // 0620..063F 7333 COMMON, // 0640 7334 ARABIC, // 0641..064A 7335 INHERITED, // 064B..0655 7336 ARABIC, // 0656..066F 7337 INHERITED, // 0670 7338 ARABIC, // 0671..06DC 7339 COMMON, // 06DD 7340 ARABIC, // 06DE..06FF 7341 SYRIAC, // 0700..070D 7342 UNKNOWN, // 070E 7343 SYRIAC, // 070F..074A 7344 UNKNOWN, // 074B..074C 7345 SYRIAC, // 074D..074F 7346 ARABIC, // 0750..077F 7347 THAANA, // 0780..07B1 7348 UNKNOWN, // 07B2..07BF 7349 NKO, // 07C0..07FA 7350 UNKNOWN, // 07FB..07FC 7351 NKO, // 07FD..07FF 7352 SAMARITAN, // 0800..082D 7353 UNKNOWN, // 082E..082F 7354 SAMARITAN, // 0830..083E 7355 UNKNOWN, // 083F 7356 MANDAIC, // 0840..085B 7357 UNKNOWN, // 085C..085D 7358 MANDAIC, // 085E 7359 UNKNOWN, // 085F 7360 SYRIAC, // 0860..086A 7361 UNKNOWN, // 086B..086F 7362 ARABIC, // 0870..088E 7363 UNKNOWN, // 088F 7364 ARABIC, // 0890..0891 7365 UNKNOWN, // 0892..0896 7366 ARABIC, // 0897..08E1 7367 COMMON, // 08E2 7368 ARABIC, // 08E3..08FF 7369 DEVANAGARI, // 0900..0950 7370 INHERITED, // 0951..0954 7371 DEVANAGARI, // 0955..0963 7372 COMMON, // 0964..0965 7373 DEVANAGARI, // 0966..097F 7374 BENGALI, // 0980..0983 7375 UNKNOWN, // 0984 7376 BENGALI, // 0985..098C 7377 UNKNOWN, // 098D..098E 7378 BENGALI, // 098F..0990 7379 UNKNOWN, // 0991..0992 7380 BENGALI, // 0993..09A8 7381 UNKNOWN, // 09A9 7382 BENGALI, // 09AA..09B0 7383 UNKNOWN, // 09B1 7384 BENGALI, // 09B2 7385 UNKNOWN, // 09B3..09B5 7386 BENGALI, // 09B6..09B9 7387 UNKNOWN, // 09BA..09BB 7388 BENGALI, // 09BC..09C4 7389 UNKNOWN, // 09C5..09C6 7390 BENGALI, // 09C7..09C8 7391 UNKNOWN, // 09C9..09CA 7392 BENGALI, // 09CB..09CE 7393 UNKNOWN, // 09CF..09D6 7394 BENGALI, // 09D7 7395 UNKNOWN, // 09D8..09DB 7396 BENGALI, // 09DC..09DD 7397 UNKNOWN, // 09DE 7398 BENGALI, // 09DF..09E3 7399 UNKNOWN, // 09E4..09E5 7400 BENGALI, // 09E6..09FE 7401 UNKNOWN, // 09FF..0A00 7402 GURMUKHI, // 0A01..0A03 7403 UNKNOWN, // 0A04 7404 GURMUKHI, // 0A05..0A0A 7405 UNKNOWN, // 0A0B..0A0E 7406 GURMUKHI, // 0A0F..0A10 7407 UNKNOWN, // 0A11..0A12 7408 GURMUKHI, // 0A13..0A28 7409 UNKNOWN, // 0A29 7410 GURMUKHI, // 0A2A..0A30 7411 UNKNOWN, // 0A31 7412 GURMUKHI, // 0A32..0A33 7413 UNKNOWN, // 0A34 7414 GURMUKHI, // 0A35..0A36 7415 UNKNOWN, // 0A37 7416 GURMUKHI, // 0A38..0A39 7417 UNKNOWN, // 0A3A..0A3B 7418 GURMUKHI, // 0A3C 7419 UNKNOWN, // 0A3D 7420 GURMUKHI, // 0A3E..0A42 7421 UNKNOWN, // 0A43..0A46 7422 GURMUKHI, // 0A47..0A48 7423 UNKNOWN, // 0A49..0A4A 7424 GURMUKHI, // 0A4B..0A4D 7425 UNKNOWN, // 0A4E..0A50 7426 GURMUKHI, // 0A51 7427 UNKNOWN, // 0A52..0A58 7428 GURMUKHI, // 0A59..0A5C 7429 UNKNOWN, // 0A5D 7430 GURMUKHI, // 0A5E 7431 UNKNOWN, // 0A5F..0A65 7432 GURMUKHI, // 0A66..0A76 7433 UNKNOWN, // 0A77..0A80 7434 GUJARATI, // 0A81..0A83 7435 UNKNOWN, // 0A84 7436 GUJARATI, // 0A85..0A8D 7437 UNKNOWN, // 0A8E 7438 GUJARATI, // 0A8F..0A91 7439 UNKNOWN, // 0A92 7440 GUJARATI, // 0A93..0AA8 7441 UNKNOWN, // 0AA9 7442 GUJARATI, // 0AAA..0AB0 7443 UNKNOWN, // 0AB1 7444 GUJARATI, // 0AB2..0AB3 7445 UNKNOWN, // 0AB4 7446 GUJARATI, // 0AB5..0AB9 7447 UNKNOWN, // 0ABA..0ABB 7448 GUJARATI, // 0ABC..0AC5 7449 UNKNOWN, // 0AC6 7450 GUJARATI, // 0AC7..0AC9 7451 UNKNOWN, // 0ACA 7452 GUJARATI, // 0ACB..0ACD 7453 UNKNOWN, // 0ACE..0ACF 7454 GUJARATI, // 0AD0 7455 UNKNOWN, // 0AD1..0ADF 7456 GUJARATI, // 0AE0..0AE3 7457 UNKNOWN, // 0AE4..0AE5 7458 GUJARATI, // 0AE6..0AF1 7459 UNKNOWN, // 0AF2..0AF8 7460 GUJARATI, // 0AF9..0AFF 7461 UNKNOWN, // 0B00 7462 ORIYA, // 0B01..0B03 7463 UNKNOWN, // 0B04 7464 ORIYA, // 0B05..0B0C 7465 UNKNOWN, // 0B0D..0B0E 7466 ORIYA, // 0B0F..0B10 7467 UNKNOWN, // 0B11..0B12 7468 ORIYA, // 0B13..0B28 7469 UNKNOWN, // 0B29 7470 ORIYA, // 0B2A..0B30 7471 UNKNOWN, // 0B31 7472 ORIYA, // 0B32..0B33 7473 UNKNOWN, // 0B34 7474 ORIYA, // 0B35..0B39 7475 UNKNOWN, // 0B3A..0B3B 7476 ORIYA, // 0B3C..0B44 7477 UNKNOWN, // 0B45..0B46 7478 ORIYA, // 0B47..0B48 7479 UNKNOWN, // 0B49..0B4A 7480 ORIYA, // 0B4B..0B4D 7481 UNKNOWN, // 0B4E..0B54 7482 ORIYA, // 0B55..0B57 7483 UNKNOWN, // 0B58..0B5B 7484 ORIYA, // 0B5C..0B5D 7485 UNKNOWN, // 0B5E 7486 ORIYA, // 0B5F..0B63 7487 UNKNOWN, // 0B64..0B65 7488 ORIYA, // 0B66..0B77 7489 UNKNOWN, // 0B78..0B81 7490 TAMIL, // 0B82..0B83 7491 UNKNOWN, // 0B84 7492 TAMIL, // 0B85..0B8A 7493 UNKNOWN, // 0B8B..0B8D 7494 TAMIL, // 0B8E..0B90 7495 UNKNOWN, // 0B91 7496 TAMIL, // 0B92..0B95 7497 UNKNOWN, // 0B96..0B98 7498 TAMIL, // 0B99..0B9A 7499 UNKNOWN, // 0B9B 7500 TAMIL, // 0B9C 7501 UNKNOWN, // 0B9D 7502 TAMIL, // 0B9E..0B9F 7503 UNKNOWN, // 0BA0..0BA2 7504 TAMIL, // 0BA3..0BA4 7505 UNKNOWN, // 0BA5..0BA7 7506 TAMIL, // 0BA8..0BAA 7507 UNKNOWN, // 0BAB..0BAD 7508 TAMIL, // 0BAE..0BB9 7509 UNKNOWN, // 0BBA..0BBD 7510 TAMIL, // 0BBE..0BC2 7511 UNKNOWN, // 0BC3..0BC5 7512 TAMIL, // 0BC6..0BC8 7513 UNKNOWN, // 0BC9 7514 TAMIL, // 0BCA..0BCD 7515 UNKNOWN, // 0BCE..0BCF 7516 TAMIL, // 0BD0 7517 UNKNOWN, // 0BD1..0BD6 7518 TAMIL, // 0BD7 7519 UNKNOWN, // 0BD8..0BE5 7520 TAMIL, // 0BE6..0BFA 7521 UNKNOWN, // 0BFB..0BFF 7522 TELUGU, // 0C00..0C0C 7523 UNKNOWN, // 0C0D 7524 TELUGU, // 0C0E..0C10 7525 UNKNOWN, // 0C11 7526 TELUGU, // 0C12..0C28 7527 UNKNOWN, // 0C29 7528 TELUGU, // 0C2A..0C39 7529 UNKNOWN, // 0C3A..0C3B 7530 TELUGU, // 0C3C..0C44 7531 UNKNOWN, // 0C45 7532 TELUGU, // 0C46..0C48 7533 UNKNOWN, // 0C49 7534 TELUGU, // 0C4A..0C4D 7535 UNKNOWN, // 0C4E..0C54 7536 TELUGU, // 0C55..0C56 7537 UNKNOWN, // 0C57 7538 TELUGU, // 0C58..0C5A 7539 UNKNOWN, // 0C5B..0C5C 7540 TELUGU, // 0C5D 7541 UNKNOWN, // 0C5E..0C5F 7542 TELUGU, // 0C60..0C63 7543 UNKNOWN, // 0C64..0C65 7544 TELUGU, // 0C66..0C6F 7545 UNKNOWN, // 0C70..0C76 7546 TELUGU, // 0C77..0C7F 7547 KANNADA, // 0C80..0C8C 7548 UNKNOWN, // 0C8D 7549 KANNADA, // 0C8E..0C90 7550 UNKNOWN, // 0C91 7551 KANNADA, // 0C92..0CA8 7552 UNKNOWN, // 0CA9 7553 KANNADA, // 0CAA..0CB3 7554 UNKNOWN, // 0CB4 7555 KANNADA, // 0CB5..0CB9 7556 UNKNOWN, // 0CBA..0CBB 7557 KANNADA, // 0CBC..0CC4 7558 UNKNOWN, // 0CC5 7559 KANNADA, // 0CC6..0CC8 7560 UNKNOWN, // 0CC9 7561 KANNADA, // 0CCA..0CCD 7562 UNKNOWN, // 0CCE..0CD4 7563 KANNADA, // 0CD5..0CD6 7564 UNKNOWN, // 0CD7..0CDC 7565 KANNADA, // 0CDD..0CDE 7566 UNKNOWN, // 0CDF 7567 KANNADA, // 0CE0..0CE3 7568 UNKNOWN, // 0CE4..0CE5 7569 KANNADA, // 0CE6..0CEF 7570 UNKNOWN, // 0CF0 7571 KANNADA, // 0CF1..0CF3 7572 UNKNOWN, // 0CF4..0CFF 7573 MALAYALAM, // 0D00..0D0C 7574 UNKNOWN, // 0D0D 7575 MALAYALAM, // 0D0E..0D10 7576 UNKNOWN, // 0D11 7577 MALAYALAM, // 0D12..0D44 7578 UNKNOWN, // 0D45 7579 MALAYALAM, // 0D46..0D48 7580 UNKNOWN, // 0D49 7581 MALAYALAM, // 0D4A..0D4F 7582 UNKNOWN, // 0D50..0D53 7583 MALAYALAM, // 0D54..0D63 7584 UNKNOWN, // 0D64..0D65 7585 MALAYALAM, // 0D66..0D7F 7586 UNKNOWN, // 0D80 7587 SINHALA, // 0D81..0D83 7588 UNKNOWN, // 0D84 7589 SINHALA, // 0D85..0D96 7590 UNKNOWN, // 0D97..0D99 7591 SINHALA, // 0D9A..0DB1 7592 UNKNOWN, // 0DB2 7593 SINHALA, // 0DB3..0DBB 7594 UNKNOWN, // 0DBC 7595 SINHALA, // 0DBD 7596 UNKNOWN, // 0DBE..0DBF 7597 SINHALA, // 0DC0..0DC6 7598 UNKNOWN, // 0DC7..0DC9 7599 SINHALA, // 0DCA 7600 UNKNOWN, // 0DCB..0DCE 7601 SINHALA, // 0DCF..0DD4 7602 UNKNOWN, // 0DD5 7603 SINHALA, // 0DD6 7604 UNKNOWN, // 0DD7 7605 SINHALA, // 0DD8..0DDF 7606 UNKNOWN, // 0DE0..0DE5 7607 SINHALA, // 0DE6..0DEF 7608 UNKNOWN, // 0DF0..0DF1 7609 SINHALA, // 0DF2..0DF4 7610 UNKNOWN, // 0DF5..0E00 7611 THAI, // 0E01..0E3A 7612 UNKNOWN, // 0E3B..0E3E 7613 COMMON, // 0E3F 7614 THAI, // 0E40..0E5B 7615 UNKNOWN, // 0E5C..0E80 7616 LAO, // 0E81..0E82 7617 UNKNOWN, // 0E83 7618 LAO, // 0E84 7619 UNKNOWN, // 0E85 7620 LAO, // 0E86..0E8A 7621 UNKNOWN, // 0E8B 7622 LAO, // 0E8C..0EA3 7623 UNKNOWN, // 0EA4 7624 LAO, // 0EA5 7625 UNKNOWN, // 0EA6 7626 LAO, // 0EA7..0EBD 7627 UNKNOWN, // 0EBE..0EBF 7628 LAO, // 0EC0..0EC4 7629 UNKNOWN, // 0EC5 7630 LAO, // 0EC6 7631 UNKNOWN, // 0EC7 7632 LAO, // 0EC8..0ECE 7633 UNKNOWN, // 0ECF 7634 LAO, // 0ED0..0ED9 7635 UNKNOWN, // 0EDA..0EDB 7636 LAO, // 0EDC..0EDF 7637 UNKNOWN, // 0EE0..0EFF 7638 TIBETAN, // 0F00..0F47 7639 UNKNOWN, // 0F48 7640 TIBETAN, // 0F49..0F6C 7641 UNKNOWN, // 0F6D..0F70 7642 TIBETAN, // 0F71..0F97 7643 UNKNOWN, // 0F98 7644 TIBETAN, // 0F99..0FBC 7645 UNKNOWN, // 0FBD 7646 TIBETAN, // 0FBE..0FCC 7647 UNKNOWN, // 0FCD 7648 TIBETAN, // 0FCE..0FD4 7649 COMMON, // 0FD5..0FD8 7650 TIBETAN, // 0FD9..0FDA 7651 UNKNOWN, // 0FDB..0FFF 7652 MYANMAR, // 1000..109F 7653 GEORGIAN, // 10A0..10C5 7654 UNKNOWN, // 10C6 7655 GEORGIAN, // 10C7 7656 UNKNOWN, // 10C8..10CC 7657 GEORGIAN, // 10CD 7658 UNKNOWN, // 10CE..10CF 7659 GEORGIAN, // 10D0..10FA 7660 COMMON, // 10FB 7661 GEORGIAN, // 10FC..10FF 7662 HANGUL, // 1100..11FF 7663 ETHIOPIC, // 1200..1248 7664 UNKNOWN, // 1249 7665 ETHIOPIC, // 124A..124D 7666 UNKNOWN, // 124E..124F 7667 ETHIOPIC, // 1250..1256 7668 UNKNOWN, // 1257 7669 ETHIOPIC, // 1258 7670 UNKNOWN, // 1259 7671 ETHIOPIC, // 125A..125D 7672 UNKNOWN, // 125E..125F 7673 ETHIOPIC, // 1260..1288 7674 UNKNOWN, // 1289 7675 ETHIOPIC, // 128A..128D 7676 UNKNOWN, // 128E..128F 7677 ETHIOPIC, // 1290..12B0 7678 UNKNOWN, // 12B1 7679 ETHIOPIC, // 12B2..12B5 7680 UNKNOWN, // 12B6..12B7 7681 ETHIOPIC, // 12B8..12BE 7682 UNKNOWN, // 12BF 7683 ETHIOPIC, // 12C0 7684 UNKNOWN, // 12C1 7685 ETHIOPIC, // 12C2..12C5 7686 UNKNOWN, // 12C6..12C7 7687 ETHIOPIC, // 12C8..12D6 7688 UNKNOWN, // 12D7 7689 ETHIOPIC, // 12D8..1310 7690 UNKNOWN, // 1311 7691 ETHIOPIC, // 1312..1315 7692 UNKNOWN, // 1316..1317 7693 ETHIOPIC, // 1318..135A 7694 UNKNOWN, // 135B..135C 7695 ETHIOPIC, // 135D..137C 7696 UNKNOWN, // 137D..137F 7697 ETHIOPIC, // 1380..1399 7698 UNKNOWN, // 139A..139F 7699 CHEROKEE, // 13A0..13F5 7700 UNKNOWN, // 13F6..13F7 7701 CHEROKEE, // 13F8..13FD 7702 UNKNOWN, // 13FE..13FF 7703 CANADIAN_ABORIGINAL, // 1400..167F 7704 OGHAM, // 1680..169C 7705 UNKNOWN, // 169D..169F 7706 RUNIC, // 16A0..16EA 7707 COMMON, // 16EB..16ED 7708 RUNIC, // 16EE..16F8 7709 UNKNOWN, // 16F9..16FF 7710 TAGALOG, // 1700..1715 7711 UNKNOWN, // 1716..171E 7712 TAGALOG, // 171F 7713 HANUNOO, // 1720..1734 7714 COMMON, // 1735..1736 7715 UNKNOWN, // 1737..173F 7716 BUHID, // 1740..1753 7717 UNKNOWN, // 1754..175F 7718 TAGBANWA, // 1760..176C 7719 UNKNOWN, // 176D 7720 TAGBANWA, // 176E..1770 7721 UNKNOWN, // 1771 7722 TAGBANWA, // 1772..1773 7723 UNKNOWN, // 1774..177F 7724 KHMER, // 1780..17DD 7725 UNKNOWN, // 17DE..17DF 7726 KHMER, // 17E0..17E9 7727 UNKNOWN, // 17EA..17EF 7728 KHMER, // 17F0..17F9 7729 UNKNOWN, // 17FA..17FF 7730 MONGOLIAN, // 1800..1801 7731 COMMON, // 1802..1803 7732 MONGOLIAN, // 1804 7733 COMMON, // 1805 7734 MONGOLIAN, // 1806..1819 7735 UNKNOWN, // 181A..181F 7736 MONGOLIAN, // 1820..1878 7737 UNKNOWN, // 1879..187F 7738 MONGOLIAN, // 1880..18AA 7739 UNKNOWN, // 18AB..18AF 7740 CANADIAN_ABORIGINAL, // 18B0..18F5 7741 UNKNOWN, // 18F6..18FF 7742 LIMBU, // 1900..191E 7743 UNKNOWN, // 191F 7744 LIMBU, // 1920..192B 7745 UNKNOWN, // 192C..192F 7746 LIMBU, // 1930..193B 7747 UNKNOWN, // 193C..193F 7748 LIMBU, // 1940 7749 UNKNOWN, // 1941..1943 7750 LIMBU, // 1944..194F 7751 TAI_LE, // 1950..196D 7752 UNKNOWN, // 196E..196F 7753 TAI_LE, // 1970..1974 7754 UNKNOWN, // 1975..197F 7755 NEW_TAI_LUE, // 1980..19AB 7756 UNKNOWN, // 19AC..19AF 7757 NEW_TAI_LUE, // 19B0..19C9 7758 UNKNOWN, // 19CA..19CF 7759 NEW_TAI_LUE, // 19D0..19DA 7760 UNKNOWN, // 19DB..19DD 7761 NEW_TAI_LUE, // 19DE..19DF 7762 KHMER, // 19E0..19FF 7763 BUGINESE, // 1A00..1A1B 7764 UNKNOWN, // 1A1C..1A1D 7765 BUGINESE, // 1A1E..1A1F 7766 TAI_THAM, // 1A20..1A5E 7767 UNKNOWN, // 1A5F 7768 TAI_THAM, // 1A60..1A7C 7769 UNKNOWN, // 1A7D..1A7E 7770 TAI_THAM, // 1A7F..1A89 7771 UNKNOWN, // 1A8A..1A8F 7772 TAI_THAM, // 1A90..1A99 7773 UNKNOWN, // 1A9A..1A9F 7774 TAI_THAM, // 1AA0..1AAD 7775 UNKNOWN, // 1AAE..1AAF 7776 INHERITED, // 1AB0..1ACE 7777 UNKNOWN, // 1ACF..1AFF 7778 BALINESE, // 1B00..1B4C 7779 UNKNOWN, // 1B4D 7780 BALINESE, // 1B4E..1B7F 7781 SUNDANESE, // 1B80..1BBF 7782 BATAK, // 1BC0..1BF3 7783 UNKNOWN, // 1BF4..1BFB 7784 BATAK, // 1BFC..1BFF 7785 LEPCHA, // 1C00..1C37 7786 UNKNOWN, // 1C38..1C3A 7787 LEPCHA, // 1C3B..1C49 7788 UNKNOWN, // 1C4A..1C4C 7789 LEPCHA, // 1C4D..1C4F 7790 OL_CHIKI, // 1C50..1C7F 7791 CYRILLIC, // 1C80..1C8A 7792 UNKNOWN, // 1C8B..1C8F 7793 GEORGIAN, // 1C90..1CBA 7794 UNKNOWN, // 1CBB..1CBC 7795 GEORGIAN, // 1CBD..1CBF 7796 SUNDANESE, // 1CC0..1CC7 7797 UNKNOWN, // 1CC8..1CCF 7798 INHERITED, // 1CD0..1CD2 7799 COMMON, // 1CD3 7800 INHERITED, // 1CD4..1CE0 7801 COMMON, // 1CE1 7802 INHERITED, // 1CE2..1CE8 7803 COMMON, // 1CE9..1CEC 7804 INHERITED, // 1CED 7805 COMMON, // 1CEE..1CF3 7806 INHERITED, // 1CF4 7807 COMMON, // 1CF5..1CF7 7808 INHERITED, // 1CF8..1CF9 7809 COMMON, // 1CFA 7810 UNKNOWN, // 1CFB..1CFF 7811 LATIN, // 1D00..1D25 7812 GREEK, // 1D26..1D2A 7813 CYRILLIC, // 1D2B 7814 LATIN, // 1D2C..1D5C 7815 GREEK, // 1D5D..1D61 7816 LATIN, // 1D62..1D65 7817 GREEK, // 1D66..1D6A 7818 LATIN, // 1D6B..1D77 7819 CYRILLIC, // 1D78 7820 LATIN, // 1D79..1DBE 7821 GREEK, // 1DBF 7822 INHERITED, // 1DC0..1DFF 7823 LATIN, // 1E00..1EFF 7824 GREEK, // 1F00..1F15 7825 UNKNOWN, // 1F16..1F17 7826 GREEK, // 1F18..1F1D 7827 UNKNOWN, // 1F1E..1F1F 7828 GREEK, // 1F20..1F45 7829 UNKNOWN, // 1F46..1F47 7830 GREEK, // 1F48..1F4D 7831 UNKNOWN, // 1F4E..1F4F 7832 GREEK, // 1F50..1F57 7833 UNKNOWN, // 1F58 7834 GREEK, // 1F59 7835 UNKNOWN, // 1F5A 7836 GREEK, // 1F5B 7837 UNKNOWN, // 1F5C 7838 GREEK, // 1F5D 7839 UNKNOWN, // 1F5E 7840 GREEK, // 1F5F..1F7D 7841 UNKNOWN, // 1F7E..1F7F 7842 GREEK, // 1F80..1FB4 7843 UNKNOWN, // 1FB5 7844 GREEK, // 1FB6..1FC4 7845 UNKNOWN, // 1FC5 7846 GREEK, // 1FC6..1FD3 7847 UNKNOWN, // 1FD4..1FD5 7848 GREEK, // 1FD6..1FDB 7849 UNKNOWN, // 1FDC 7850 GREEK, // 1FDD..1FEF 7851 UNKNOWN, // 1FF0..1FF1 7852 GREEK, // 1FF2..1FF4 7853 UNKNOWN, // 1FF5 7854 GREEK, // 1FF6..1FFE 7855 UNKNOWN, // 1FFF 7856 COMMON, // 2000..200B 7857 INHERITED, // 200C..200D 7858 COMMON, // 200E..2064 7859 UNKNOWN, // 2065 7860 COMMON, // 2066..2070 7861 LATIN, // 2071 7862 UNKNOWN, // 2072..2073 7863 COMMON, // 2074..207E 7864 LATIN, // 207F 7865 COMMON, // 2080..208E 7866 UNKNOWN, // 208F 7867 LATIN, // 2090..209C 7868 UNKNOWN, // 209D..209F 7869 COMMON, // 20A0..20C0 7870 UNKNOWN, // 20C1..20CF 7871 INHERITED, // 20D0..20F0 7872 UNKNOWN, // 20F1..20FF 7873 COMMON, // 2100..2125 7874 GREEK, // 2126 7875 COMMON, // 2127..2129 7876 LATIN, // 212A..212B 7877 COMMON, // 212C..2131 7878 LATIN, // 2132 7879 COMMON, // 2133..214D 7880 LATIN, // 214E 7881 COMMON, // 214F..215F 7882 LATIN, // 2160..2188 7883 COMMON, // 2189..218B 7884 UNKNOWN, // 218C..218F 7885 COMMON, // 2190..2429 7886 UNKNOWN, // 242A..243F 7887 COMMON, // 2440..244A 7888 UNKNOWN, // 244B..245F 7889 COMMON, // 2460..27FF 7890 BRAILLE, // 2800..28FF 7891 COMMON, // 2900..2B73 7892 UNKNOWN, // 2B74..2B75 7893 COMMON, // 2B76..2B95 7894 UNKNOWN, // 2B96 7895 COMMON, // 2B97..2BFF 7896 GLAGOLITIC, // 2C00..2C5F 7897 LATIN, // 2C60..2C7F 7898 COPTIC, // 2C80..2CF3 7899 UNKNOWN, // 2CF4..2CF8 7900 COPTIC, // 2CF9..2CFF 7901 GEORGIAN, // 2D00..2D25 7902 UNKNOWN, // 2D26 7903 GEORGIAN, // 2D27 7904 UNKNOWN, // 2D28..2D2C 7905 GEORGIAN, // 2D2D 7906 UNKNOWN, // 2D2E..2D2F 7907 TIFINAGH, // 2D30..2D67 7908 UNKNOWN, // 2D68..2D6E 7909 TIFINAGH, // 2D6F..2D70 7910 UNKNOWN, // 2D71..2D7E 7911 TIFINAGH, // 2D7F 7912 ETHIOPIC, // 2D80..2D96 7913 UNKNOWN, // 2D97..2D9F 7914 ETHIOPIC, // 2DA0..2DA6 7915 UNKNOWN, // 2DA7 7916 ETHIOPIC, // 2DA8..2DAE 7917 UNKNOWN, // 2DAF 7918 ETHIOPIC, // 2DB0..2DB6 7919 UNKNOWN, // 2DB7 7920 ETHIOPIC, // 2DB8..2DBE 7921 UNKNOWN, // 2DBF 7922 ETHIOPIC, // 2DC0..2DC6 7923 UNKNOWN, // 2DC7 7924 ETHIOPIC, // 2DC8..2DCE 7925 UNKNOWN, // 2DCF 7926 ETHIOPIC, // 2DD0..2DD6 7927 UNKNOWN, // 2DD7 7928 ETHIOPIC, // 2DD8..2DDE 7929 UNKNOWN, // 2DDF 7930 CYRILLIC, // 2DE0..2DFF 7931 COMMON, // 2E00..2E5D 7932 UNKNOWN, // 2E5E..2E7F 7933 HAN, // 2E80..2E99 7934 UNKNOWN, // 2E9A 7935 HAN, // 2E9B..2EF3 7936 UNKNOWN, // 2EF4..2EFF 7937 HAN, // 2F00..2FD5 7938 UNKNOWN, // 2FD6..2FEF 7939 COMMON, // 2FF0..3004 7940 HAN, // 3005 7941 COMMON, // 3006 7942 HAN, // 3007 7943 COMMON, // 3008..3020 7944 HAN, // 3021..3029 7945 INHERITED, // 302A..302D 7946 HANGUL, // 302E..302F 7947 COMMON, // 3030..3037 7948 HAN, // 3038..303B 7949 COMMON, // 303C..303F 7950 UNKNOWN, // 3040 7951 HIRAGANA, // 3041..3096 7952 UNKNOWN, // 3097..3098 7953 INHERITED, // 3099..309A 7954 COMMON, // 309B..309C 7955 HIRAGANA, // 309D..309F 7956 COMMON, // 30A0 7957 KATAKANA, // 30A1..30FA 7958 COMMON, // 30FB..30FC 7959 KATAKANA, // 30FD..30FF 7960 UNKNOWN, // 3100..3104 7961 BOPOMOFO, // 3105..312F 7962 UNKNOWN, // 3130 7963 HANGUL, // 3131..318E 7964 UNKNOWN, // 318F 7965 COMMON, // 3190..319F 7966 BOPOMOFO, // 31A0..31BF 7967 COMMON, // 31C0..31E5 7968 UNKNOWN, // 31E6..31EE 7969 COMMON, // 31EF 7970 KATAKANA, // 31F0..31FF 7971 HANGUL, // 3200..321E 7972 UNKNOWN, // 321F 7973 COMMON, // 3220..325F 7974 HANGUL, // 3260..327E 7975 COMMON, // 327F..32CF 7976 KATAKANA, // 32D0..32FE 7977 COMMON, // 32FF 7978 KATAKANA, // 3300..3357 7979 COMMON, // 3358..33FF 7980 HAN, // 3400..4DBF 7981 COMMON, // 4DC0..4DFF 7982 HAN, // 4E00..9FFF 7983 YI, // A000..A48C 7984 UNKNOWN, // A48D..A48F 7985 YI, // A490..A4C6 7986 UNKNOWN, // A4C7..A4CF 7987 LISU, // A4D0..A4FF 7988 VAI, // A500..A62B 7989 UNKNOWN, // A62C..A63F 7990 CYRILLIC, // A640..A69F 7991 BAMUM, // A6A0..A6F7 7992 UNKNOWN, // A6F8..A6FF 7993 COMMON, // A700..A721 7994 LATIN, // A722..A787 7995 COMMON, // A788..A78A 7996 LATIN, // A78B..A7CD 7997 UNKNOWN, // A7CE..A7CF 7998 LATIN, // A7D0..A7D1 7999 UNKNOWN, // A7D2 8000 LATIN, // A7D3 8001 UNKNOWN, // A7D4 8002 LATIN, // A7D5..A7DC 8003 UNKNOWN, // A7DD..A7F1 8004 LATIN, // A7F2..A7FF 8005 SYLOTI_NAGRI, // A800..A82C 8006 UNKNOWN, // A82D..A82F 8007 COMMON, // A830..A839 8008 UNKNOWN, // A83A..A83F 8009 PHAGS_PA, // A840..A877 8010 UNKNOWN, // A878..A87F 8011 SAURASHTRA, // A880..A8C5 8012 UNKNOWN, // A8C6..A8CD 8013 SAURASHTRA, // A8CE..A8D9 8014 UNKNOWN, // A8DA..A8DF 8015 DEVANAGARI, // A8E0..A8FF 8016 KAYAH_LI, // A900..A92D 8017 COMMON, // A92E 8018 KAYAH_LI, // A92F 8019 REJANG, // A930..A953 8020 UNKNOWN, // A954..A95E 8021 REJANG, // A95F 8022 HANGUL, // A960..A97C 8023 UNKNOWN, // A97D..A97F 8024 JAVANESE, // A980..A9CD 8025 UNKNOWN, // A9CE 8026 COMMON, // A9CF 8027 JAVANESE, // A9D0..A9D9 8028 UNKNOWN, // A9DA..A9DD 8029 JAVANESE, // A9DE..A9DF 8030 MYANMAR, // A9E0..A9FE 8031 UNKNOWN, // A9FF 8032 CHAM, // AA00..AA36 8033 UNKNOWN, // AA37..AA3F 8034 CHAM, // AA40..AA4D 8035 UNKNOWN, // AA4E..AA4F 8036 CHAM, // AA50..AA59 8037 UNKNOWN, // AA5A..AA5B 8038 CHAM, // AA5C..AA5F 8039 MYANMAR, // AA60..AA7F 8040 TAI_VIET, // AA80..AAC2 8041 UNKNOWN, // AAC3..AADA 8042 TAI_VIET, // AADB..AADF 8043 MEETEI_MAYEK, // AAE0..AAF6 8044 UNKNOWN, // AAF7..AB00 8045 ETHIOPIC, // AB01..AB06 8046 UNKNOWN, // AB07..AB08 8047 ETHIOPIC, // AB09..AB0E 8048 UNKNOWN, // AB0F..AB10 8049 ETHIOPIC, // AB11..AB16 8050 UNKNOWN, // AB17..AB1F 8051 ETHIOPIC, // AB20..AB26 8052 UNKNOWN, // AB27 8053 ETHIOPIC, // AB28..AB2E 8054 UNKNOWN, // AB2F 8055 LATIN, // AB30..AB5A 8056 COMMON, // AB5B 8057 LATIN, // AB5C..AB64 8058 GREEK, // AB65 8059 LATIN, // AB66..AB69 8060 COMMON, // AB6A..AB6B 8061 UNKNOWN, // AB6C..AB6F 8062 CHEROKEE, // AB70..ABBF 8063 MEETEI_MAYEK, // ABC0..ABED 8064 UNKNOWN, // ABEE..ABEF 8065 MEETEI_MAYEK, // ABF0..ABF9 8066 UNKNOWN, // ABFA..ABFF 8067 HANGUL, // AC00..D7A3 8068 UNKNOWN, // D7A4..D7AF 8069 HANGUL, // D7B0..D7C6 8070 UNKNOWN, // D7C7..D7CA 8071 HANGUL, // D7CB..D7FB 8072 UNKNOWN, // D7FC..F8FF 8073 HAN, // F900..FA6D 8074 UNKNOWN, // FA6E..FA6F 8075 HAN, // FA70..FAD9 8076 UNKNOWN, // FADA..FAFF 8077 LATIN, // FB00..FB06 8078 UNKNOWN, // FB07..FB12 8079 ARMENIAN, // FB13..FB17 8080 UNKNOWN, // FB18..FB1C 8081 HEBREW, // FB1D..FB36 8082 UNKNOWN, // FB37 8083 HEBREW, // FB38..FB3C 8084 UNKNOWN, // FB3D 8085 HEBREW, // FB3E 8086 UNKNOWN, // FB3F 8087 HEBREW, // FB40..FB41 8088 UNKNOWN, // FB42 8089 HEBREW, // FB43..FB44 8090 UNKNOWN, // FB45 8091 HEBREW, // FB46..FB4F 8092 ARABIC, // FB50..FBC2 8093 UNKNOWN, // FBC3..FBD2 8094 ARABIC, // FBD3..FD3D 8095 COMMON, // FD3E..FD3F 8096 ARABIC, // FD40..FD8F 8097 UNKNOWN, // FD90..FD91 8098 ARABIC, // FD92..FDC7 8099 UNKNOWN, // FDC8..FDCE 8100 ARABIC, // FDCF 8101 UNKNOWN, // FDD0..FDEF 8102 ARABIC, // FDF0..FDFF 8103 INHERITED, // FE00..FE0F 8104 COMMON, // FE10..FE19 8105 UNKNOWN, // FE1A..FE1F 8106 INHERITED, // FE20..FE2D 8107 CYRILLIC, // FE2E..FE2F 8108 COMMON, // FE30..FE52 8109 UNKNOWN, // FE53 8110 COMMON, // FE54..FE66 8111 UNKNOWN, // FE67 8112 COMMON, // FE68..FE6B 8113 UNKNOWN, // FE6C..FE6F 8114 ARABIC, // FE70..FE74 8115 UNKNOWN, // FE75 8116 ARABIC, // FE76..FEFC 8117 UNKNOWN, // FEFD..FEFE 8118 COMMON, // FEFF 8119 UNKNOWN, // FF00 8120 COMMON, // FF01..FF20 8121 LATIN, // FF21..FF3A 8122 COMMON, // FF3B..FF40 8123 LATIN, // FF41..FF5A 8124 COMMON, // FF5B..FF65 8125 KATAKANA, // FF66..FF6F 8126 COMMON, // FF70 8127 KATAKANA, // FF71..FF9D 8128 COMMON, // FF9E..FF9F 8129 HANGUL, // FFA0..FFBE 8130 UNKNOWN, // FFBF..FFC1 8131 HANGUL, // FFC2..FFC7 8132 UNKNOWN, // FFC8..FFC9 8133 HANGUL, // FFCA..FFCF 8134 UNKNOWN, // FFD0..FFD1 8135 HANGUL, // FFD2..FFD7 8136 UNKNOWN, // FFD8..FFD9 8137 HANGUL, // FFDA..FFDC 8138 UNKNOWN, // FFDD..FFDF 8139 COMMON, // FFE0..FFE6 8140 UNKNOWN, // FFE7 8141 COMMON, // FFE8..FFEE 8142 UNKNOWN, // FFEF..FFF8 8143 COMMON, // FFF9..FFFD 8144 UNKNOWN, // FFFE..FFFF 8145 LINEAR_B, // 10000..1000B 8146 UNKNOWN, // 1000C 8147 LINEAR_B, // 1000D..10026 8148 UNKNOWN, // 10027 8149 LINEAR_B, // 10028..1003A 8150 UNKNOWN, // 1003B 8151 LINEAR_B, // 1003C..1003D 8152 UNKNOWN, // 1003E 8153 LINEAR_B, // 1003F..1004D 8154 UNKNOWN, // 1004E..1004F 8155 LINEAR_B, // 10050..1005D 8156 UNKNOWN, // 1005E..1007F 8157 LINEAR_B, // 10080..100FA 8158 UNKNOWN, // 100FB..100FF 8159 COMMON, // 10100..10102 8160 UNKNOWN, // 10103..10106 8161 COMMON, // 10107..10133 8162 UNKNOWN, // 10134..10136 8163 COMMON, // 10137..1013F 8164 GREEK, // 10140..1018E 8165 UNKNOWN, // 1018F 8166 COMMON, // 10190..1019C 8167 UNKNOWN, // 1019D..1019F 8168 GREEK, // 101A0 8169 UNKNOWN, // 101A1..101CF 8170 COMMON, // 101D0..101FC 8171 INHERITED, // 101FD 8172 UNKNOWN, // 101FE..1027F 8173 LYCIAN, // 10280..1029C 8174 UNKNOWN, // 1029D..1029F 8175 CARIAN, // 102A0..102D0 8176 UNKNOWN, // 102D1..102DF 8177 INHERITED, // 102E0 8178 COMMON, // 102E1..102FB 8179 UNKNOWN, // 102FC..102FF 8180 OLD_ITALIC, // 10300..10323 8181 UNKNOWN, // 10324..1032C 8182 OLD_ITALIC, // 1032D..1032F 8183 GOTHIC, // 10330..1034A 8184 UNKNOWN, // 1034B..1034F 8185 OLD_PERMIC, // 10350..1037A 8186 UNKNOWN, // 1037B..1037F 8187 UGARITIC, // 10380..1039D 8188 UNKNOWN, // 1039E 8189 UGARITIC, // 1039F 8190 OLD_PERSIAN, // 103A0..103C3 8191 UNKNOWN, // 103C4..103C7 8192 OLD_PERSIAN, // 103C8..103D5 8193 UNKNOWN, // 103D6..103FF 8194 DESERET, // 10400..1044F 8195 SHAVIAN, // 10450..1047F 8196 OSMANYA, // 10480..1049D 8197 UNKNOWN, // 1049E..1049F 8198 OSMANYA, // 104A0..104A9 8199 UNKNOWN, // 104AA..104AF 8200 OSAGE, // 104B0..104D3 8201 UNKNOWN, // 104D4..104D7 8202 OSAGE, // 104D8..104FB 8203 UNKNOWN, // 104FC..104FF 8204 ELBASAN, // 10500..10527 8205 UNKNOWN, // 10528..1052F 8206 CAUCASIAN_ALBANIAN, // 10530..10563 8207 UNKNOWN, // 10564..1056E 8208 CAUCASIAN_ALBANIAN, // 1056F 8209 VITHKUQI, // 10570..1057A 8210 UNKNOWN, // 1057B 8211 VITHKUQI, // 1057C..1058A 8212 UNKNOWN, // 1058B 8213 VITHKUQI, // 1058C..10592 8214 UNKNOWN, // 10593 8215 VITHKUQI, // 10594..10595 8216 UNKNOWN, // 10596 8217 VITHKUQI, // 10597..105A1 8218 UNKNOWN, // 105A2 8219 VITHKUQI, // 105A3..105B1 8220 UNKNOWN, // 105B2 8221 VITHKUQI, // 105B3..105B9 8222 UNKNOWN, // 105BA 8223 VITHKUQI, // 105BB..105BC 8224 UNKNOWN, // 105BD..105BF 8225 TODHRI, // 105C0..105F3 8226 UNKNOWN, // 105F4..105FF 8227 LINEAR_A, // 10600..10736 8228 UNKNOWN, // 10737..1073F 8229 LINEAR_A, // 10740..10755 8230 UNKNOWN, // 10756..1075F 8231 LINEAR_A, // 10760..10767 8232 UNKNOWN, // 10768..1077F 8233 LATIN, // 10780..10785 8234 UNKNOWN, // 10786 8235 LATIN, // 10787..107B0 8236 UNKNOWN, // 107B1 8237 LATIN, // 107B2..107BA 8238 UNKNOWN, // 107BB..107FF 8239 CYPRIOT, // 10800..10805 8240 UNKNOWN, // 10806..10807 8241 CYPRIOT, // 10808 8242 UNKNOWN, // 10809 8243 CYPRIOT, // 1080A..10835 8244 UNKNOWN, // 10836 8245 CYPRIOT, // 10837..10838 8246 UNKNOWN, // 10839..1083B 8247 CYPRIOT, // 1083C 8248 UNKNOWN, // 1083D..1083E 8249 CYPRIOT, // 1083F 8250 IMPERIAL_ARAMAIC, // 10840..10855 8251 UNKNOWN, // 10856 8252 IMPERIAL_ARAMAIC, // 10857..1085F 8253 PALMYRENE, // 10860..1087F 8254 NABATAEAN, // 10880..1089E 8255 UNKNOWN, // 1089F..108A6 8256 NABATAEAN, // 108A7..108AF 8257 UNKNOWN, // 108B0..108DF 8258 HATRAN, // 108E0..108F2 8259 UNKNOWN, // 108F3 8260 HATRAN, // 108F4..108F5 8261 UNKNOWN, // 108F6..108FA 8262 HATRAN, // 108FB..108FF 8263 PHOENICIAN, // 10900..1091B 8264 UNKNOWN, // 1091C..1091E 8265 PHOENICIAN, // 1091F 8266 LYDIAN, // 10920..10939 8267 UNKNOWN, // 1093A..1093E 8268 LYDIAN, // 1093F 8269 UNKNOWN, // 10940..1097F 8270 MEROITIC_HIEROGLYPHS, // 10980..1099F 8271 MEROITIC_CURSIVE, // 109A0..109B7 8272 UNKNOWN, // 109B8..109BB 8273 MEROITIC_CURSIVE, // 109BC..109CF 8274 UNKNOWN, // 109D0..109D1 8275 MEROITIC_CURSIVE, // 109D2..109FF 8276 KHAROSHTHI, // 10A00..10A03 8277 UNKNOWN, // 10A04 8278 KHAROSHTHI, // 10A05..10A06 8279 UNKNOWN, // 10A07..10A0B 8280 KHAROSHTHI, // 10A0C..10A13 8281 UNKNOWN, // 10A14 8282 KHAROSHTHI, // 10A15..10A17 8283 UNKNOWN, // 10A18 8284 KHAROSHTHI, // 10A19..10A35 8285 UNKNOWN, // 10A36..10A37 8286 KHAROSHTHI, // 10A38..10A3A 8287 UNKNOWN, // 10A3B..10A3E 8288 KHAROSHTHI, // 10A3F..10A48 8289 UNKNOWN, // 10A49..10A4F 8290 KHAROSHTHI, // 10A50..10A58 8291 UNKNOWN, // 10A59..10A5F 8292 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8293 OLD_NORTH_ARABIAN, // 10A80..10A9F 8294 UNKNOWN, // 10AA0..10ABF 8295 MANICHAEAN, // 10AC0..10AE6 8296 UNKNOWN, // 10AE7..10AEA 8297 MANICHAEAN, // 10AEB..10AF6 8298 UNKNOWN, // 10AF7..10AFF 8299 AVESTAN, // 10B00..10B35 8300 UNKNOWN, // 10B36..10B38 8301 AVESTAN, // 10B39..10B3F 8302 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8303 UNKNOWN, // 10B56..10B57 8304 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8305 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8306 UNKNOWN, // 10B73..10B77 8307 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8308 PSALTER_PAHLAVI, // 10B80..10B91 8309 UNKNOWN, // 10B92..10B98 8310 PSALTER_PAHLAVI, // 10B99..10B9C 8311 UNKNOWN, // 10B9D..10BA8 8312 PSALTER_PAHLAVI, // 10BA9..10BAF 8313 UNKNOWN, // 10BB0..10BFF 8314 OLD_TURKIC, // 10C00..10C48 8315 UNKNOWN, // 10C49..10C7F 8316 OLD_HUNGARIAN, // 10C80..10CB2 8317 UNKNOWN, // 10CB3..10CBF 8318 OLD_HUNGARIAN, // 10CC0..10CF2 8319 UNKNOWN, // 10CF3..10CF9 8320 OLD_HUNGARIAN, // 10CFA..10CFF 8321 HANIFI_ROHINGYA, // 10D00..10D27 8322 UNKNOWN, // 10D28..10D2F 8323 HANIFI_ROHINGYA, // 10D30..10D39 8324 UNKNOWN, // 10D3A..10D3F 8325 GARAY, // 10D40..10D65 8326 UNKNOWN, // 10D66..10D68 8327 GARAY, // 10D69..10D85 8328 UNKNOWN, // 10D86..10D8D 8329 GARAY, // 10D8E..10D8F 8330 UNKNOWN, // 10D90..10E5F 8331 ARABIC, // 10E60..10E7E 8332 UNKNOWN, // 10E7F 8333 YEZIDI, // 10E80..10EA9 8334 UNKNOWN, // 10EAA 8335 YEZIDI, // 10EAB..10EAD 8336 UNKNOWN, // 10EAE..10EAF 8337 YEZIDI, // 10EB0..10EB1 8338 UNKNOWN, // 10EB2..10EC1 8339 ARABIC, // 10EC2..10EC4 8340 UNKNOWN, // 10EC5..10EFB 8341 ARABIC, // 10EFC..10EFF 8342 OLD_SOGDIAN, // 10F00..10F27 8343 UNKNOWN, // 10F28..10F2F 8344 SOGDIAN, // 10F30..10F59 8345 UNKNOWN, // 10F5A..10F6F 8346 OLD_UYGHUR, // 10F70..10F89 8347 UNKNOWN, // 10F8A..10FAF 8348 CHORASMIAN, // 10FB0..10FCB 8349 UNKNOWN, // 10FCC..10FDF 8350 ELYMAIC, // 10FE0..10FF6 8351 UNKNOWN, // 10FF7..10FFF 8352 BRAHMI, // 11000..1104D 8353 UNKNOWN, // 1104E..11051 8354 BRAHMI, // 11052..11075 8355 UNKNOWN, // 11076..1107E 8356 BRAHMI, // 1107F 8357 KAITHI, // 11080..110C2 8358 UNKNOWN, // 110C3..110CC 8359 KAITHI, // 110CD 8360 UNKNOWN, // 110CE..110CF 8361 SORA_SOMPENG, // 110D0..110E8 8362 UNKNOWN, // 110E9..110EF 8363 SORA_SOMPENG, // 110F0..110F9 8364 UNKNOWN, // 110FA..110FF 8365 CHAKMA, // 11100..11134 8366 UNKNOWN, // 11135 8367 CHAKMA, // 11136..11147 8368 UNKNOWN, // 11148..1114F 8369 MAHAJANI, // 11150..11176 8370 UNKNOWN, // 11177..1117F 8371 SHARADA, // 11180..111DF 8372 UNKNOWN, // 111E0 8373 SINHALA, // 111E1..111F4 8374 UNKNOWN, // 111F5..111FF 8375 KHOJKI, // 11200..11211 8376 UNKNOWN, // 11212 8377 KHOJKI, // 11213..11241 8378 UNKNOWN, // 11242..1127F 8379 MULTANI, // 11280..11286 8380 UNKNOWN, // 11287 8381 MULTANI, // 11288 8382 UNKNOWN, // 11289 8383 MULTANI, // 1128A..1128D 8384 UNKNOWN, // 1128E 8385 MULTANI, // 1128F..1129D 8386 UNKNOWN, // 1129E 8387 MULTANI, // 1129F..112A9 8388 UNKNOWN, // 112AA..112AF 8389 KHUDAWADI, // 112B0..112EA 8390 UNKNOWN, // 112EB..112EF 8391 KHUDAWADI, // 112F0..112F9 8392 UNKNOWN, // 112FA..112FF 8393 GRANTHA, // 11300..11303 8394 UNKNOWN, // 11304 8395 GRANTHA, // 11305..1130C 8396 UNKNOWN, // 1130D..1130E 8397 GRANTHA, // 1130F..11310 8398 UNKNOWN, // 11311..11312 8399 GRANTHA, // 11313..11328 8400 UNKNOWN, // 11329 8401 GRANTHA, // 1132A..11330 8402 UNKNOWN, // 11331 8403 GRANTHA, // 11332..11333 8404 UNKNOWN, // 11334 8405 GRANTHA, // 11335..11339 8406 UNKNOWN, // 1133A 8407 INHERITED, // 1133B 8408 GRANTHA, // 1133C..11344 8409 UNKNOWN, // 11345..11346 8410 GRANTHA, // 11347..11348 8411 UNKNOWN, // 11349..1134A 8412 GRANTHA, // 1134B..1134D 8413 UNKNOWN, // 1134E..1134F 8414 GRANTHA, // 11350 8415 UNKNOWN, // 11351..11356 8416 GRANTHA, // 11357 8417 UNKNOWN, // 11358..1135C 8418 GRANTHA, // 1135D..11363 8419 UNKNOWN, // 11364..11365 8420 GRANTHA, // 11366..1136C 8421 UNKNOWN, // 1136D..1136F 8422 GRANTHA, // 11370..11374 8423 UNKNOWN, // 11375..1137F 8424 TULU_TIGALARI, // 11380..11389 8425 UNKNOWN, // 1138A 8426 TULU_TIGALARI, // 1138B 8427 UNKNOWN, // 1138C..1138D 8428 TULU_TIGALARI, // 1138E 8429 UNKNOWN, // 1138F 8430 TULU_TIGALARI, // 11390..113B5 8431 UNKNOWN, // 113B6 8432 TULU_TIGALARI, // 113B7..113C0 8433 UNKNOWN, // 113C1 8434 TULU_TIGALARI, // 113C2 8435 UNKNOWN, // 113C3..113C4 8436 TULU_TIGALARI, // 113C5 8437 UNKNOWN, // 113C6 8438 TULU_TIGALARI, // 113C7..113CA 8439 UNKNOWN, // 113CB 8440 TULU_TIGALARI, // 113CC..113D5 8441 UNKNOWN, // 113D6 8442 TULU_TIGALARI, // 113D7..113D8 8443 UNKNOWN, // 113D9..113E0 8444 TULU_TIGALARI, // 113E1..113E2 8445 UNKNOWN, // 113E3..113FF 8446 NEWA, // 11400..1145B 8447 UNKNOWN, // 1145C 8448 NEWA, // 1145D..11461 8449 UNKNOWN, // 11462..1147F 8450 TIRHUTA, // 11480..114C7 8451 UNKNOWN, // 114C8..114CF 8452 TIRHUTA, // 114D0..114D9 8453 UNKNOWN, // 114DA..1157F 8454 SIDDHAM, // 11580..115B5 8455 UNKNOWN, // 115B6..115B7 8456 SIDDHAM, // 115B8..115DD 8457 UNKNOWN, // 115DE..115FF 8458 MODI, // 11600..11644 8459 UNKNOWN, // 11645..1164F 8460 MODI, // 11650..11659 8461 UNKNOWN, // 1165A..1165F 8462 MONGOLIAN, // 11660..1166C 8463 UNKNOWN, // 1166D..1167F 8464 TAKRI, // 11680..116B9 8465 UNKNOWN, // 116BA..116BF 8466 TAKRI, // 116C0..116C9 8467 UNKNOWN, // 116CA..116CF 8468 MYANMAR, // 116D0..116E3 8469 UNKNOWN, // 116E4..116FF 8470 AHOM, // 11700..1171A 8471 UNKNOWN, // 1171B..1171C 8472 AHOM, // 1171D..1172B 8473 UNKNOWN, // 1172C..1172F 8474 AHOM, // 11730..11746 8475 UNKNOWN, // 11747..117FF 8476 DOGRA, // 11800..1183B 8477 UNKNOWN, // 1183C..1189F 8478 WARANG_CITI, // 118A0..118F2 8479 UNKNOWN, // 118F3..118FE 8480 WARANG_CITI, // 118FF 8481 DIVES_AKURU, // 11900..11906 8482 UNKNOWN, // 11907..11908 8483 DIVES_AKURU, // 11909 8484 UNKNOWN, // 1190A..1190B 8485 DIVES_AKURU, // 1190C..11913 8486 UNKNOWN, // 11914 8487 DIVES_AKURU, // 11915..11916 8488 UNKNOWN, // 11917 8489 DIVES_AKURU, // 11918..11935 8490 UNKNOWN, // 11936 8491 DIVES_AKURU, // 11937..11938 8492 UNKNOWN, // 11939..1193A 8493 DIVES_AKURU, // 1193B..11946 8494 UNKNOWN, // 11947..1194F 8495 DIVES_AKURU, // 11950..11959 8496 UNKNOWN, // 1195A..1199F 8497 NANDINAGARI, // 119A0..119A7 8498 UNKNOWN, // 119A8..119A9 8499 NANDINAGARI, // 119AA..119D7 8500 UNKNOWN, // 119D8..119D9 8501 NANDINAGARI, // 119DA..119E4 8502 UNKNOWN, // 119E5..119FF 8503 ZANABAZAR_SQUARE, // 11A00..11A47 8504 UNKNOWN, // 11A48..11A4F 8505 SOYOMBO, // 11A50..11AA2 8506 UNKNOWN, // 11AA3..11AAF 8507 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8508 PAU_CIN_HAU, // 11AC0..11AF8 8509 UNKNOWN, // 11AF9..11AFF 8510 DEVANAGARI, // 11B00..11B09 8511 UNKNOWN, // 11B0A..11BBF 8512 SUNUWAR, // 11BC0..11BE1 8513 UNKNOWN, // 11BE2..11BEF 8514 SUNUWAR, // 11BF0..11BF9 8515 UNKNOWN, // 11BFA..11BFF 8516 BHAIKSUKI, // 11C00..11C08 8517 UNKNOWN, // 11C09 8518 BHAIKSUKI, // 11C0A..11C36 8519 UNKNOWN, // 11C37 8520 BHAIKSUKI, // 11C38..11C45 8521 UNKNOWN, // 11C46..11C4F 8522 BHAIKSUKI, // 11C50..11C6C 8523 UNKNOWN, // 11C6D..11C6F 8524 MARCHEN, // 11C70..11C8F 8525 UNKNOWN, // 11C90..11C91 8526 MARCHEN, // 11C92..11CA7 8527 UNKNOWN, // 11CA8 8528 MARCHEN, // 11CA9..11CB6 8529 UNKNOWN, // 11CB7..11CFF 8530 MASARAM_GONDI, // 11D00..11D06 8531 UNKNOWN, // 11D07 8532 MASARAM_GONDI, // 11D08..11D09 8533 UNKNOWN, // 11D0A 8534 MASARAM_GONDI, // 11D0B..11D36 8535 UNKNOWN, // 11D37..11D39 8536 MASARAM_GONDI, // 11D3A 8537 UNKNOWN, // 11D3B 8538 MASARAM_GONDI, // 11D3C..11D3D 8539 UNKNOWN, // 11D3E 8540 MASARAM_GONDI, // 11D3F..11D47 8541 UNKNOWN, // 11D48..11D4F 8542 MASARAM_GONDI, // 11D50..11D59 8543 UNKNOWN, // 11D5A..11D5F 8544 GUNJALA_GONDI, // 11D60..11D65 8545 UNKNOWN, // 11D66 8546 GUNJALA_GONDI, // 11D67..11D68 8547 UNKNOWN, // 11D69 8548 GUNJALA_GONDI, // 11D6A..11D8E 8549 UNKNOWN, // 11D8F 8550 GUNJALA_GONDI, // 11D90..11D91 8551 UNKNOWN, // 11D92 8552 GUNJALA_GONDI, // 11D93..11D98 8553 UNKNOWN, // 11D99..11D9F 8554 GUNJALA_GONDI, // 11DA0..11DA9 8555 UNKNOWN, // 11DAA..11EDF 8556 MAKASAR, // 11EE0..11EF8 8557 UNKNOWN, // 11EF9..11EFF 8558 KAWI, // 11F00..11F10 8559 UNKNOWN, // 11F11 8560 KAWI, // 11F12..11F3A 8561 UNKNOWN, // 11F3B..11F3D 8562 KAWI, // 11F3E..11F5A 8563 UNKNOWN, // 11F5B..11FAF 8564 LISU, // 11FB0 8565 UNKNOWN, // 11FB1..11FBF 8566 TAMIL, // 11FC0..11FF1 8567 UNKNOWN, // 11FF2..11FFE 8568 TAMIL, // 11FFF 8569 CUNEIFORM, // 12000..12399 8570 UNKNOWN, // 1239A..123FF 8571 CUNEIFORM, // 12400..1246E 8572 UNKNOWN, // 1246F 8573 CUNEIFORM, // 12470..12474 8574 UNKNOWN, // 12475..1247F 8575 CUNEIFORM, // 12480..12543 8576 UNKNOWN, // 12544..12F8F 8577 CYPRO_MINOAN, // 12F90..12FF2 8578 UNKNOWN, // 12FF3..12FFF 8579 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8580 UNKNOWN, // 13456..1345F 8581 EGYPTIAN_HIEROGLYPHS, // 13460..143FA 8582 UNKNOWN, // 143FB..143FF 8583 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8584 UNKNOWN, // 14647..160FF 8585 GURUNG_KHEMA, // 16100..16139 8586 UNKNOWN, // 1613A..167FF 8587 BAMUM, // 16800..16A38 8588 UNKNOWN, // 16A39..16A3F 8589 MRO, // 16A40..16A5E 8590 UNKNOWN, // 16A5F 8591 MRO, // 16A60..16A69 8592 UNKNOWN, // 16A6A..16A6D 8593 MRO, // 16A6E..16A6F 8594 TANGSA, // 16A70..16ABE 8595 UNKNOWN, // 16ABF 8596 TANGSA, // 16AC0..16AC9 8597 UNKNOWN, // 16ACA..16ACF 8598 BASSA_VAH, // 16AD0..16AED 8599 UNKNOWN, // 16AEE..16AEF 8600 BASSA_VAH, // 16AF0..16AF5 8601 UNKNOWN, // 16AF6..16AFF 8602 PAHAWH_HMONG, // 16B00..16B45 8603 UNKNOWN, // 16B46..16B4F 8604 PAHAWH_HMONG, // 16B50..16B59 8605 UNKNOWN, // 16B5A 8606 PAHAWH_HMONG, // 16B5B..16B61 8607 UNKNOWN, // 16B62 8608 PAHAWH_HMONG, // 16B63..16B77 8609 UNKNOWN, // 16B78..16B7C 8610 PAHAWH_HMONG, // 16B7D..16B8F 8611 UNKNOWN, // 16B90..16D3F 8612 KIRAT_RAI, // 16D40..16D79 8613 UNKNOWN, // 16D7A..16E3F 8614 MEDEFAIDRIN, // 16E40..16E9A 8615 UNKNOWN, // 16E9B..16EFF 8616 MIAO, // 16F00..16F4A 8617 UNKNOWN, // 16F4B..16F4E 8618 MIAO, // 16F4F..16F87 8619 UNKNOWN, // 16F88..16F8E 8620 MIAO, // 16F8F..16F9F 8621 UNKNOWN, // 16FA0..16FDF 8622 TANGUT, // 16FE0 8623 NUSHU, // 16FE1 8624 HAN, // 16FE2..16FE3 8625 KHITAN_SMALL_SCRIPT, // 16FE4 8626 UNKNOWN, // 16FE5..16FEF 8627 HAN, // 16FF0..16FF1 8628 UNKNOWN, // 16FF2..16FFF 8629 TANGUT, // 17000..187F7 8630 UNKNOWN, // 187F8..187FF 8631 TANGUT, // 18800..18AFF 8632 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8633 UNKNOWN, // 18CD6..18CFE 8634 KHITAN_SMALL_SCRIPT, // 18CFF 8635 TANGUT, // 18D00..18D08 8636 UNKNOWN, // 18D09..1AFEF 8637 KATAKANA, // 1AFF0..1AFF3 8638 UNKNOWN, // 1AFF4 8639 KATAKANA, // 1AFF5..1AFFB 8640 UNKNOWN, // 1AFFC 8641 KATAKANA, // 1AFFD..1AFFE 8642 UNKNOWN, // 1AFFF 8643 KATAKANA, // 1B000 8644 HIRAGANA, // 1B001..1B11F 8645 KATAKANA, // 1B120..1B122 8646 UNKNOWN, // 1B123..1B131 8647 HIRAGANA, // 1B132 8648 UNKNOWN, // 1B133..1B14F 8649 HIRAGANA, // 1B150..1B152 8650 UNKNOWN, // 1B153..1B154 8651 KATAKANA, // 1B155 8652 UNKNOWN, // 1B156..1B163 8653 KATAKANA, // 1B164..1B167 8654 UNKNOWN, // 1B168..1B16F 8655 NUSHU, // 1B170..1B2FB 8656 UNKNOWN, // 1B2FC..1BBFF 8657 DUPLOYAN, // 1BC00..1BC6A 8658 UNKNOWN, // 1BC6B..1BC6F 8659 DUPLOYAN, // 1BC70..1BC7C 8660 UNKNOWN, // 1BC7D..1BC7F 8661 DUPLOYAN, // 1BC80..1BC88 8662 UNKNOWN, // 1BC89..1BC8F 8663 DUPLOYAN, // 1BC90..1BC99 8664 UNKNOWN, // 1BC9A..1BC9B 8665 DUPLOYAN, // 1BC9C..1BC9F 8666 COMMON, // 1BCA0..1BCA3 8667 UNKNOWN, // 1BCA4..1CBFF 8668 COMMON, // 1CC00..1CCF9 8669 UNKNOWN, // 1CCFA..1CCFF 8670 COMMON, // 1CD00..1CEB3 8671 UNKNOWN, // 1CEB4..1CEFF 8672 INHERITED, // 1CF00..1CF2D 8673 UNKNOWN, // 1CF2E..1CF2F 8674 INHERITED, // 1CF30..1CF46 8675 UNKNOWN, // 1CF47..1CF4F 8676 COMMON, // 1CF50..1CFC3 8677 UNKNOWN, // 1CFC4..1CFFF 8678 COMMON, // 1D000..1D0F5 8679 UNKNOWN, // 1D0F6..1D0FF 8680 COMMON, // 1D100..1D126 8681 UNKNOWN, // 1D127..1D128 8682 COMMON, // 1D129..1D166 8683 INHERITED, // 1D167..1D169 8684 COMMON, // 1D16A..1D17A 8685 INHERITED, // 1D17B..1D182 8686 COMMON, // 1D183..1D184 8687 INHERITED, // 1D185..1D18B 8688 COMMON, // 1D18C..1D1A9 8689 INHERITED, // 1D1AA..1D1AD 8690 COMMON, // 1D1AE..1D1EA 8691 UNKNOWN, // 1D1EB..1D1FF 8692 GREEK, // 1D200..1D245 8693 UNKNOWN, // 1D246..1D2BF 8694 COMMON, // 1D2C0..1D2D3 8695 UNKNOWN, // 1D2D4..1D2DF 8696 COMMON, // 1D2E0..1D2F3 8697 UNKNOWN, // 1D2F4..1D2FF 8698 COMMON, // 1D300..1D356 8699 UNKNOWN, // 1D357..1D35F 8700 COMMON, // 1D360..1D378 8701 UNKNOWN, // 1D379..1D3FF 8702 COMMON, // 1D400..1D454 8703 UNKNOWN, // 1D455 8704 COMMON, // 1D456..1D49C 8705 UNKNOWN, // 1D49D 8706 COMMON, // 1D49E..1D49F 8707 UNKNOWN, // 1D4A0..1D4A1 8708 COMMON, // 1D4A2 8709 UNKNOWN, // 1D4A3..1D4A4 8710 COMMON, // 1D4A5..1D4A6 8711 UNKNOWN, // 1D4A7..1D4A8 8712 COMMON, // 1D4A9..1D4AC 8713 UNKNOWN, // 1D4AD 8714 COMMON, // 1D4AE..1D4B9 8715 UNKNOWN, // 1D4BA 8716 COMMON, // 1D4BB 8717 UNKNOWN, // 1D4BC 8718 COMMON, // 1D4BD..1D4C3 8719 UNKNOWN, // 1D4C4 8720 COMMON, // 1D4C5..1D505 8721 UNKNOWN, // 1D506 8722 COMMON, // 1D507..1D50A 8723 UNKNOWN, // 1D50B..1D50C 8724 COMMON, // 1D50D..1D514 8725 UNKNOWN, // 1D515 8726 COMMON, // 1D516..1D51C 8727 UNKNOWN, // 1D51D 8728 COMMON, // 1D51E..1D539 8729 UNKNOWN, // 1D53A 8730 COMMON, // 1D53B..1D53E 8731 UNKNOWN, // 1D53F 8732 COMMON, // 1D540..1D544 8733 UNKNOWN, // 1D545 8734 COMMON, // 1D546 8735 UNKNOWN, // 1D547..1D549 8736 COMMON, // 1D54A..1D550 8737 UNKNOWN, // 1D551 8738 COMMON, // 1D552..1D6A5 8739 UNKNOWN, // 1D6A6..1D6A7 8740 COMMON, // 1D6A8..1D7CB 8741 UNKNOWN, // 1D7CC..1D7CD 8742 COMMON, // 1D7CE..1D7FF 8743 SIGNWRITING, // 1D800..1DA8B 8744 UNKNOWN, // 1DA8C..1DA9A 8745 SIGNWRITING, // 1DA9B..1DA9F 8746 UNKNOWN, // 1DAA0 8747 SIGNWRITING, // 1DAA1..1DAAF 8748 UNKNOWN, // 1DAB0..1DEFF 8749 LATIN, // 1DF00..1DF1E 8750 UNKNOWN, // 1DF1F..1DF24 8751 LATIN, // 1DF25..1DF2A 8752 UNKNOWN, // 1DF2B..1DFFF 8753 GLAGOLITIC, // 1E000..1E006 8754 UNKNOWN, // 1E007 8755 GLAGOLITIC, // 1E008..1E018 8756 UNKNOWN, // 1E019..1E01A 8757 GLAGOLITIC, // 1E01B..1E021 8758 UNKNOWN, // 1E022 8759 GLAGOLITIC, // 1E023..1E024 8760 UNKNOWN, // 1E025 8761 GLAGOLITIC, // 1E026..1E02A 8762 UNKNOWN, // 1E02B..1E02F 8763 CYRILLIC, // 1E030..1E06D 8764 UNKNOWN, // 1E06E..1E08E 8765 CYRILLIC, // 1E08F 8766 UNKNOWN, // 1E090..1E0FF 8767 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8768 UNKNOWN, // 1E12D..1E12F 8769 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8770 UNKNOWN, // 1E13E..1E13F 8771 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8772 UNKNOWN, // 1E14A..1E14D 8773 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8774 UNKNOWN, // 1E150..1E28F 8775 TOTO, // 1E290..1E2AE 8776 UNKNOWN, // 1E2AF..1E2BF 8777 WANCHO, // 1E2C0..1E2F9 8778 UNKNOWN, // 1E2FA..1E2FE 8779 WANCHO, // 1E2FF 8780 UNKNOWN, // 1E300..1E4CF 8781 NAG_MUNDARI, // 1E4D0..1E4F9 8782 UNKNOWN, // 1E4FA..1E5CF 8783 OL_ONAL, // 1E5D0..1E5FA 8784 UNKNOWN, // 1E5FB..1E5FE 8785 OL_ONAL, // 1E5FF 8786 UNKNOWN, // 1E600..1E7DF 8787 ETHIOPIC, // 1E7E0..1E7E6 8788 UNKNOWN, // 1E7E7 8789 ETHIOPIC, // 1E7E8..1E7EB 8790 UNKNOWN, // 1E7EC 8791 ETHIOPIC, // 1E7ED..1E7EE 8792 UNKNOWN, // 1E7EF 8793 ETHIOPIC, // 1E7F0..1E7FE 8794 UNKNOWN, // 1E7FF 8795 MENDE_KIKAKUI, // 1E800..1E8C4 8796 UNKNOWN, // 1E8C5..1E8C6 8797 MENDE_KIKAKUI, // 1E8C7..1E8D6 8798 UNKNOWN, // 1E8D7..1E8FF 8799 ADLAM, // 1E900..1E94B 8800 UNKNOWN, // 1E94C..1E94F 8801 ADLAM, // 1E950..1E959 8802 UNKNOWN, // 1E95A..1E95D 8803 ADLAM, // 1E95E..1E95F 8804 UNKNOWN, // 1E960..1EC70 8805 COMMON, // 1EC71..1ECB4 8806 UNKNOWN, // 1ECB5..1ED00 8807 COMMON, // 1ED01..1ED3D 8808 UNKNOWN, // 1ED3E..1EDFF 8809 ARABIC, // 1EE00..1EE03 8810 UNKNOWN, // 1EE04 8811 ARABIC, // 1EE05..1EE1F 8812 UNKNOWN, // 1EE20 8813 ARABIC, // 1EE21..1EE22 8814 UNKNOWN, // 1EE23 8815 ARABIC, // 1EE24 8816 UNKNOWN, // 1EE25..1EE26 8817 ARABIC, // 1EE27 8818 UNKNOWN, // 1EE28 8819 ARABIC, // 1EE29..1EE32 8820 UNKNOWN, // 1EE33 8821 ARABIC, // 1EE34..1EE37 8822 UNKNOWN, // 1EE38 8823 ARABIC, // 1EE39 8824 UNKNOWN, // 1EE3A 8825 ARABIC, // 1EE3B 8826 UNKNOWN, // 1EE3C..1EE41 8827 ARABIC, // 1EE42 8828 UNKNOWN, // 1EE43..1EE46 8829 ARABIC, // 1EE47 8830 UNKNOWN, // 1EE48 8831 ARABIC, // 1EE49 8832 UNKNOWN, // 1EE4A 8833 ARABIC, // 1EE4B 8834 UNKNOWN, // 1EE4C 8835 ARABIC, // 1EE4D..1EE4F 8836 UNKNOWN, // 1EE50 8837 ARABIC, // 1EE51..1EE52 8838 UNKNOWN, // 1EE53 8839 ARABIC, // 1EE54 8840 UNKNOWN, // 1EE55..1EE56 8841 ARABIC, // 1EE57 8842 UNKNOWN, // 1EE58 8843 ARABIC, // 1EE59 8844 UNKNOWN, // 1EE5A 8845 ARABIC, // 1EE5B 8846 UNKNOWN, // 1EE5C 8847 ARABIC, // 1EE5D 8848 UNKNOWN, // 1EE5E 8849 ARABIC, // 1EE5F 8850 UNKNOWN, // 1EE60 8851 ARABIC, // 1EE61..1EE62 8852 UNKNOWN, // 1EE63 8853 ARABIC, // 1EE64 8854 UNKNOWN, // 1EE65..1EE66 8855 ARABIC, // 1EE67..1EE6A 8856 UNKNOWN, // 1EE6B 8857 ARABIC, // 1EE6C..1EE72 8858 UNKNOWN, // 1EE73 8859 ARABIC, // 1EE74..1EE77 8860 UNKNOWN, // 1EE78 8861 ARABIC, // 1EE79..1EE7C 8862 UNKNOWN, // 1EE7D 8863 ARABIC, // 1EE7E 8864 UNKNOWN, // 1EE7F 8865 ARABIC, // 1EE80..1EE89 8866 UNKNOWN, // 1EE8A 8867 ARABIC, // 1EE8B..1EE9B 8868 UNKNOWN, // 1EE9C..1EEA0 8869 ARABIC, // 1EEA1..1EEA3 8870 UNKNOWN, // 1EEA4 8871 ARABIC, // 1EEA5..1EEA9 8872 UNKNOWN, // 1EEAA 8873 ARABIC, // 1EEAB..1EEBB 8874 UNKNOWN, // 1EEBC..1EEEF 8875 ARABIC, // 1EEF0..1EEF1 8876 UNKNOWN, // 1EEF2..1EFFF 8877 COMMON, // 1F000..1F02B 8878 UNKNOWN, // 1F02C..1F02F 8879 COMMON, // 1F030..1F093 8880 UNKNOWN, // 1F094..1F09F 8881 COMMON, // 1F0A0..1F0AE 8882 UNKNOWN, // 1F0AF..1F0B0 8883 COMMON, // 1F0B1..1F0BF 8884 UNKNOWN, // 1F0C0 8885 COMMON, // 1F0C1..1F0CF 8886 UNKNOWN, // 1F0D0 8887 COMMON, // 1F0D1..1F0F5 8888 UNKNOWN, // 1F0F6..1F0FF 8889 COMMON, // 1F100..1F1AD 8890 UNKNOWN, // 1F1AE..1F1E5 8891 COMMON, // 1F1E6..1F1FF 8892 HIRAGANA, // 1F200 8893 COMMON, // 1F201..1F202 8894 UNKNOWN, // 1F203..1F20F 8895 COMMON, // 1F210..1F23B 8896 UNKNOWN, // 1F23C..1F23F 8897 COMMON, // 1F240..1F248 8898 UNKNOWN, // 1F249..1F24F 8899 COMMON, // 1F250..1F251 8900 UNKNOWN, // 1F252..1F25F 8901 COMMON, // 1F260..1F265 8902 UNKNOWN, // 1F266..1F2FF 8903 COMMON, // 1F300..1F6D7 8904 UNKNOWN, // 1F6D8..1F6DB 8905 COMMON, // 1F6DC..1F6EC 8906 UNKNOWN, // 1F6ED..1F6EF 8907 COMMON, // 1F6F0..1F6FC 8908 UNKNOWN, // 1F6FD..1F6FF 8909 COMMON, // 1F700..1F776 8910 UNKNOWN, // 1F777..1F77A 8911 COMMON, // 1F77B..1F7D9 8912 UNKNOWN, // 1F7DA..1F7DF 8913 COMMON, // 1F7E0..1F7EB 8914 UNKNOWN, // 1F7EC..1F7EF 8915 COMMON, // 1F7F0 8916 UNKNOWN, // 1F7F1..1F7FF 8917 COMMON, // 1F800..1F80B 8918 UNKNOWN, // 1F80C..1F80F 8919 COMMON, // 1F810..1F847 8920 UNKNOWN, // 1F848..1F84F 8921 COMMON, // 1F850..1F859 8922 UNKNOWN, // 1F85A..1F85F 8923 COMMON, // 1F860..1F887 8924 UNKNOWN, // 1F888..1F88F 8925 COMMON, // 1F890..1F8AD 8926 UNKNOWN, // 1F8AE..1F8AF 8927 COMMON, // 1F8B0..1F8BB 8928 UNKNOWN, // 1F8BC..1F8BF 8929 COMMON, // 1F8C0..1F8C1 8930 UNKNOWN, // 1F8C2..1F8FF 8931 COMMON, // 1F900..1FA53 8932 UNKNOWN, // 1FA54..1FA5F 8933 COMMON, // 1FA60..1FA6D 8934 UNKNOWN, // 1FA6E..1FA6F 8935 COMMON, // 1FA70..1FA7C 8936 UNKNOWN, // 1FA7D..1FA7F 8937 COMMON, // 1FA80..1FA89 8938 UNKNOWN, // 1FA8A..1FA8E 8939 COMMON, // 1FA8F..1FAC6 8940 UNKNOWN, // 1FAC7..1FACD 8941 COMMON, // 1FACE..1FADC 8942 UNKNOWN, // 1FADD..1FADE 8943 COMMON, // 1FADF..1FAE9 8944 UNKNOWN, // 1FAEA..1FAEF 8945 COMMON, // 1FAF0..1FAF8 8946 UNKNOWN, // 1FAF9..1FAFF 8947 COMMON, // 1FB00..1FB92 8948 UNKNOWN, // 1FB93 8949 COMMON, // 1FB94..1FBF9 8950 UNKNOWN, // 1FBFA..1FFFF 8951 HAN, // 20000..2A6DF 8952 UNKNOWN, // 2A6E0..2A6FF 8953 HAN, // 2A700..2B739 8954 UNKNOWN, // 2B73A..2B73F 8955 HAN, // 2B740..2B81D 8956 UNKNOWN, // 2B81E..2B81F 8957 HAN, // 2B820..2CEA1 8958 UNKNOWN, // 2CEA2..2CEAF 8959 HAN, // 2CEB0..2EBE0 8960 UNKNOWN, // 2EBE1..2EBEF 8961 HAN, // 2EBF0..2EE5D 8962 UNKNOWN, // 2EE5E..2F7FF 8963 HAN, // 2F800..2FA1D 8964 UNKNOWN, // 2FA1E..2FFFF 8965 HAN, // 30000..3134A 8966 UNKNOWN, // 3134B..3134F 8967 HAN, // 31350..323AF 8968 UNKNOWN, // 323B0..E0000 8969 COMMON, // E0001 8970 UNKNOWN, // E0002..E001F 8971 COMMON, // E0020..E007F 8972 UNKNOWN, // E0080..E00FF 8973 INHERITED, // E0100..E01EF 8974 UNKNOWN, // E01F0..10FFFF 8975 }; 8976 8977 private static final HashMap<String, Character.UnicodeScript> aliases; 8978 static { 8979 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 8980 aliases.put("ADLM", ADLAM); 8981 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8982 aliases.put("AHOM", AHOM); 8983 aliases.put("ARAB", ARABIC); 8984 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8985 aliases.put("ARMN", ARMENIAN); 8986 aliases.put("AVST", AVESTAN); 8987 aliases.put("BALI", BALINESE); 8988 aliases.put("BAMU", BAMUM); 8989 aliases.put("BASS", BASSA_VAH); 8990 aliases.put("BATK", BATAK); 8991 aliases.put("BENG", BENGALI); 8992 aliases.put("BHKS", BHAIKSUKI); 8993 aliases.put("BOPO", BOPOMOFO); 8994 aliases.put("BRAH", BRAHMI); 8995 aliases.put("BRAI", BRAILLE); 8996 aliases.put("BUGI", BUGINESE); 8997 aliases.put("BUHD", BUHID); 8998 aliases.put("CAKM", CHAKMA); 8999 aliases.put("CANS", CANADIAN_ABORIGINAL); 9000 aliases.put("CARI", CARIAN); 9001 aliases.put("CHAM", CHAM); 9002 aliases.put("CHER", CHEROKEE); 9003 aliases.put("CHRS", CHORASMIAN); 9004 aliases.put("COPT", COPTIC); 9005 aliases.put("CPMN", CYPRO_MINOAN); 9006 aliases.put("CPRT", CYPRIOT); 9007 aliases.put("CYRL", CYRILLIC); 9008 aliases.put("DEVA", DEVANAGARI); 9009 aliases.put("DIAK", DIVES_AKURU); 9010 aliases.put("DOGR", DOGRA); 9011 aliases.put("DSRT", DESERET); 9012 aliases.put("DUPL", DUPLOYAN); 9013 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 9014 aliases.put("ELBA", ELBASAN); 9015 aliases.put("ELYM", ELYMAIC); 9016 aliases.put("ETHI", ETHIOPIC); 9017 aliases.put("GARA", GARAY); 9018 aliases.put("GEOR", GEORGIAN); 9019 aliases.put("GLAG", GLAGOLITIC); 9020 aliases.put("GONG", GUNJALA_GONDI); 9021 aliases.put("GONM", MASARAM_GONDI); 9022 aliases.put("GOTH", GOTHIC); 9023 aliases.put("GRAN", GRANTHA); 9024 aliases.put("GREK", GREEK); 9025 aliases.put("GUJR", GUJARATI); 9026 aliases.put("GUKH", GURUNG_KHEMA); 9027 aliases.put("GURU", GURMUKHI); 9028 aliases.put("HANG", HANGUL); 9029 aliases.put("HANI", HAN); 9030 aliases.put("HANO", HANUNOO); 9031 aliases.put("HATR", HATRAN); 9032 aliases.put("HEBR", HEBREW); 9033 aliases.put("HIRA", HIRAGANA); 9034 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 9035 aliases.put("HMNG", PAHAWH_HMONG); 9036 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 9037 aliases.put("HUNG", OLD_HUNGARIAN); 9038 aliases.put("ITAL", OLD_ITALIC); 9039 aliases.put("JAVA", JAVANESE); 9040 aliases.put("KALI", KAYAH_LI); 9041 aliases.put("KANA", KATAKANA); 9042 aliases.put("KAWI", KAWI); 9043 aliases.put("KHAR", KHAROSHTHI); 9044 aliases.put("KHMR", KHMER); 9045 aliases.put("KHOJ", KHOJKI); 9046 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 9047 aliases.put("KNDA", KANNADA); 9048 aliases.put("KRAI", KIRAT_RAI); 9049 aliases.put("KTHI", KAITHI); 9050 aliases.put("LANA", TAI_THAM); 9051 aliases.put("LAOO", LAO); 9052 aliases.put("LATN", LATIN); 9053 aliases.put("LEPC", LEPCHA); 9054 aliases.put("LIMB", LIMBU); 9055 aliases.put("LINA", LINEAR_A); 9056 aliases.put("LINB", LINEAR_B); 9057 aliases.put("LISU", LISU); 9058 aliases.put("LYCI", LYCIAN); 9059 aliases.put("LYDI", LYDIAN); 9060 aliases.put("MAHJ", MAHAJANI); 9061 aliases.put("MAKA", MAKASAR); 9062 aliases.put("MAND", MANDAIC); 9063 aliases.put("MANI", MANICHAEAN); 9064 aliases.put("MARC", MARCHEN); 9065 aliases.put("MEDF", MEDEFAIDRIN); 9066 aliases.put("MEND", MENDE_KIKAKUI); 9067 aliases.put("MERC", MEROITIC_CURSIVE); 9068 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 9069 aliases.put("MLYM", MALAYALAM); 9070 aliases.put("MODI", MODI); 9071 aliases.put("MONG", MONGOLIAN); 9072 aliases.put("MROO", MRO); 9073 aliases.put("MTEI", MEETEI_MAYEK); 9074 aliases.put("MULT", MULTANI); 9075 aliases.put("MYMR", MYANMAR); 9076 aliases.put("NAGM", NAG_MUNDARI); 9077 aliases.put("NAND", NANDINAGARI); 9078 aliases.put("NARB", OLD_NORTH_ARABIAN); 9079 aliases.put("NBAT", NABATAEAN); 9080 aliases.put("NEWA", NEWA); 9081 aliases.put("NKOO", NKO); 9082 aliases.put("NSHU", NUSHU); 9083 aliases.put("OGAM", OGHAM); 9084 aliases.put("OLCK", OL_CHIKI); 9085 aliases.put("ONAO", OL_ONAL); 9086 aliases.put("ORKH", OLD_TURKIC); 9087 aliases.put("ORYA", ORIYA); 9088 aliases.put("OSGE", OSAGE); 9089 aliases.put("OSMA", OSMANYA); 9090 aliases.put("OUGR", OLD_UYGHUR); 9091 aliases.put("PALM", PALMYRENE); 9092 aliases.put("PAUC", PAU_CIN_HAU); 9093 aliases.put("PERM", OLD_PERMIC); 9094 aliases.put("PHAG", PHAGS_PA); 9095 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 9096 aliases.put("PHLP", PSALTER_PAHLAVI); 9097 aliases.put("PHNX", PHOENICIAN); 9098 aliases.put("PLRD", MIAO); 9099 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 9100 aliases.put("RJNG", REJANG); 9101 aliases.put("ROHG", HANIFI_ROHINGYA); 9102 aliases.put("RUNR", RUNIC); 9103 aliases.put("SAMR", SAMARITAN); 9104 aliases.put("SARB", OLD_SOUTH_ARABIAN); 9105 aliases.put("SAUR", SAURASHTRA); 9106 aliases.put("SGNW", SIGNWRITING); 9107 aliases.put("SHAW", SHAVIAN); 9108 aliases.put("SHRD", SHARADA); 9109 aliases.put("SIDD", SIDDHAM); 9110 aliases.put("SIND", KHUDAWADI); 9111 aliases.put("SINH", SINHALA); 9112 aliases.put("SOGD", SOGDIAN); 9113 aliases.put("SOGO", OLD_SOGDIAN); 9114 aliases.put("SORA", SORA_SOMPENG); 9115 aliases.put("SOYO", SOYOMBO); 9116 aliases.put("SUND", SUNDANESE); 9117 aliases.put("SUNU", SUNUWAR); 9118 aliases.put("SYLO", SYLOTI_NAGRI); 9119 aliases.put("SYRC", SYRIAC); 9120 aliases.put("TAGB", TAGBANWA); 9121 aliases.put("TAKR", TAKRI); 9122 aliases.put("TALE", TAI_LE); 9123 aliases.put("TALU", NEW_TAI_LUE); 9124 aliases.put("TAML", TAMIL); 9125 aliases.put("TANG", TANGUT); 9126 aliases.put("TAVT", TAI_VIET); 9127 aliases.put("TELU", TELUGU); 9128 aliases.put("TFNG", TIFINAGH); 9129 aliases.put("TGLG", TAGALOG); 9130 aliases.put("THAA", THAANA); 9131 aliases.put("THAI", THAI); 9132 aliases.put("TIBT", TIBETAN); 9133 aliases.put("TIRH", TIRHUTA); 9134 aliases.put("TNSA", TANGSA); 9135 aliases.put("TODR", TODHRI); 9136 aliases.put("TOTO", TOTO); 9137 aliases.put("TUTG", TULU_TIGALARI); 9138 aliases.put("UGAR", UGARITIC); 9139 aliases.put("VAII", VAI); 9140 aliases.put("VITH", VITHKUQI); 9141 aliases.put("WARA", WARANG_CITI); 9142 aliases.put("WCHO", WANCHO); 9143 aliases.put("XPEO", OLD_PERSIAN); 9144 aliases.put("XSUX", CUNEIFORM); 9145 aliases.put("YEZI", YEZIDI); 9146 aliases.put("YIII", YI); 9147 aliases.put("ZANB", ZANABAZAR_SQUARE); 9148 aliases.put("ZINH", INHERITED); 9149 aliases.put("ZYYY", COMMON); 9150 aliases.put("ZZZZ", UNKNOWN); 9151 } 9152 9153 /** 9154 * Returns the enum constant representing the Unicode script of which 9155 * the given character (Unicode code point) is assigned to. 9156 * 9157 * @param codePoint the character (Unicode code point) in question. 9158 * @return The {@code UnicodeScript} constant representing the 9159 * Unicode script of which this character is assigned to. 9160 * 9161 * @throws IllegalArgumentException if the specified 9162 * {@code codePoint} is an invalid Unicode code point. 9163 * @see Character#isValidCodePoint(int) 9164 * 9165 */ 9166 public static UnicodeScript of(int codePoint) { 9167 if (!isValidCodePoint(codePoint)) 9168 throw new IllegalArgumentException( 9169 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9170 int type = getType(codePoint); 9171 // leave SURROGATE and PRIVATE_USE for table lookup 9172 if (type == UNASSIGNED) 9173 return UNKNOWN; 9174 int index = Arrays.binarySearch(scriptStarts, codePoint); 9175 if (index < 0) 9176 index = -index - 2; 9177 return scripts[index]; 9178 } 9179 9180 /** 9181 * Returns the UnicodeScript constant with the given Unicode script 9182 * name or the script name alias. Script names and their aliases are 9183 * determined by The Unicode Standard. The files {@code Scripts.txt} 9184 * and {@code PropertyValueAliases.txt} define script names 9185 * and the script name aliases for a particular version of the 9186 * standard. The {@link Character} class specifies the version of 9187 * the standard that it supports. 9188 * <p> 9189 * Character case is ignored for all of the valid script names. 9190 * The en_US locale's case mapping rules are used to provide 9191 * case-insensitive string comparisons for script name validation. 9192 * 9193 * @param scriptName A {@code UnicodeScript} name. 9194 * @return The {@code UnicodeScript} constant identified 9195 * by {@code scriptName} 9196 * @throws IllegalArgumentException if {@code scriptName} is an 9197 * invalid name 9198 * @throws NullPointerException if {@code scriptName} is null 9199 */ 9200 public static final UnicodeScript forName(String scriptName) { 9201 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 9202 //.replace(' ', '_')); 9203 UnicodeScript sc = aliases.get(scriptName); 9204 if (sc != null) 9205 return sc; 9206 return valueOf(scriptName); 9207 } 9208 } 9209 9210 /** 9211 * The value of the {@code Character}. 9212 * 9213 * @serial 9214 */ 9215 private final char value; 9216 9217 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 9218 @java.io.Serial 9219 private static final long serialVersionUID = 3786198910865385080L; 9220 9221 /** 9222 * Constructs a newly allocated {@code Character} object that 9223 * represents the specified {@code char} value. 9224 * 9225 * @param value the value to be represented by the 9226 * {@code Character} object. 9227 * 9228 * @deprecated 9229 * It is rarely appropriate to use this constructor. The static factory 9230 * {@link #valueOf(char)} is generally a better choice, as it is 9231 * likely to yield significantly better space and time performance. 9232 */ 9233 @Deprecated(since="9") 9234 public Character(char value) { 9235 this.value = value; 9236 } 9237 9238 private static final class CharacterCache { 9239 private CharacterCache(){} 9240 9241 @Stable 9242 static final Character[] cache; 9243 static Character[] archivedCache; 9244 9245 static { 9246 int size = 127 + 1; 9247 9248 // Load and use the archived cache if it exists 9249 CDS.initializeFromArchive(CharacterCache.class); 9250 if (archivedCache == null) { 9251 Character[] c = new Character[size]; 9252 for (int i = 0; i < size; i++) { 9253 c[i] = new Character((char) i); 9254 } 9255 archivedCache = c; 9256 } 9257 cache = archivedCache; 9258 assert cache.length == size; 9259 } 9260 } 9261 9262 /** 9263 * Returns a {@code Character} instance representing the specified 9264 * {@code char} value. 9265 * If a new {@code Character} instance is not required, this method 9266 * should generally be used in preference to the constructor 9267 * {@link #Character(char)}, as this method is likely to yield 9268 * significantly better space and time performance by caching 9269 * frequently requested values. 9270 * 9271 * This method will always cache values in the range {@code 9272 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9273 * cache other values outside of this range. 9274 * 9275 * @param c a char value. 9276 * @return a {@code Character} instance representing {@code c}. 9277 * @since 1.5 9278 */ 9279 @IntrinsicCandidate 9280 public static Character valueOf(char c) { 9281 if (c <= 127) { // must cache 9282 return CharacterCache.cache[(int)c]; 9283 } 9284 return new Character(c); 9285 } 9286 9287 /** 9288 * Returns the value of this {@code Character} object. 9289 * @return the primitive {@code char} value represented by 9290 * this object. 9291 */ 9292 @IntrinsicCandidate 9293 public char charValue() { 9294 return value; 9295 } 9296 9297 /** 9298 * Returns a hash code for this {@code Character}; equal to the result 9299 * of invoking {@code charValue()}. 9300 * 9301 * @return a hash code value for this {@code Character} 9302 */ 9303 @Override 9304 public int hashCode() { 9305 return Character.hashCode(value); 9306 } 9307 9308 /** 9309 * Returns a hash code for a {@code char} value; compatible with 9310 * {@code Character.hashCode()}. 9311 * 9312 * @since 1.8 9313 * 9314 * @param value The {@code char} for which to return a hash code. 9315 * @return a hash code value for a {@code char} value. 9316 */ 9317 public static int hashCode(char value) { 9318 return (int)value; 9319 } 9320 9321 /** 9322 * Compares this object against the specified object. 9323 * The result is {@code true} if and only if the argument is not 9324 * {@code null} and is a {@code Character} object that 9325 * represents the same {@code char} value as this object. 9326 * 9327 * @param obj the object to compare with. 9328 * @return {@code true} if the objects are the same; 9329 * {@code false} otherwise. 9330 */ 9331 public boolean equals(Object obj) { 9332 if (obj instanceof Character c) { 9333 return value == c.charValue(); 9334 } 9335 return false; 9336 } 9337 9338 /** 9339 * Returns a {@code String} object representing this 9340 * {@code Character}'s value. The result is a string of 9341 * length 1 whose sole component is the primitive 9342 * {@code char} value represented by this 9343 * {@code Character} object. 9344 * 9345 * @return a string representation of this object. 9346 */ 9347 @Override 9348 public String toString() { 9349 return String.valueOf(value); 9350 } 9351 9352 /** 9353 * Returns a {@code String} object representing the 9354 * specified {@code char}. The result is a string of length 9355 * 1 consisting solely of the specified {@code char}. 9356 * 9357 * @apiNote This method cannot handle <a 9358 * href="#supplementary"> supplementary characters</a>. To support 9359 * all Unicode characters, including supplementary characters, use 9360 * the {@link #toString(int)} method. 9361 * 9362 * @param c the {@code char} to be converted 9363 * @return the string representation of the specified {@code char} 9364 * @since 1.4 9365 */ 9366 public static String toString(char c) { 9367 return String.valueOf(c); 9368 } 9369 9370 /** 9371 * Returns a {@code String} object representing the 9372 * specified character (Unicode code point). The result is a string of 9373 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9374 * 9375 * @param codePoint the {@code codePoint} to be converted 9376 * @return the string representation of the specified {@code codePoint} 9377 * @throws IllegalArgumentException if the specified 9378 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9379 * valid Unicode code point}. 9380 * @since 11 9381 */ 9382 public static String toString(int codePoint) { 9383 return String.valueOfCodePoint(codePoint); 9384 } 9385 9386 /** 9387 * Determines whether the specified code point is a valid 9388 * <a href="http://www.unicode.org/glossary/#code_point"> 9389 * Unicode code point value</a>. 9390 * 9391 * @param codePoint the Unicode code point to be tested 9392 * @return {@code true} if the specified code point value is between 9393 * {@link #MIN_CODE_POINT} and 9394 * {@link #MAX_CODE_POINT} inclusive; 9395 * {@code false} otherwise. 9396 * @since 1.5 9397 */ 9398 public static boolean isValidCodePoint(int codePoint) { 9399 // Optimized form of: 9400 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9401 int plane = codePoint >>> 16; 9402 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9403 } 9404 9405 /** 9406 * Determines whether the specified character (Unicode code point) 9407 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9408 * Such code points can be represented using a single {@code char}. 9409 * 9410 * @param codePoint the character (Unicode code point) to be tested 9411 * @return {@code true} if the specified code point is between 9412 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9413 * {@code false} otherwise. 9414 * @since 1.7 9415 */ 9416 public static boolean isBmpCodePoint(int codePoint) { 9417 return codePoint >>> 16 == 0; 9418 // Optimized form of: 9419 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9420 // We consistently use logical shift (>>>) to facilitate 9421 // additional runtime optimizations. 9422 } 9423 9424 /** 9425 * Determines whether the specified character (Unicode code point) 9426 * is in the <a href="#supplementary">supplementary character</a> range. 9427 * 9428 * @param codePoint the character (Unicode code point) to be tested 9429 * @return {@code true} if the specified code point is between 9430 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9431 * {@link #MAX_CODE_POINT} inclusive; 9432 * {@code false} otherwise. 9433 * @since 1.5 9434 */ 9435 public static boolean isSupplementaryCodePoint(int codePoint) { 9436 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9437 && codePoint < MAX_CODE_POINT + 1; 9438 } 9439 9440 /** 9441 * Determines if the given {@code char} value is a 9442 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9443 * Unicode high-surrogate code unit</a> 9444 * (also known as <i>leading-surrogate code unit</i>). 9445 * 9446 * <p>Such values do not represent characters by themselves, 9447 * but are used in the representation of 9448 * <a href="#supplementary">supplementary characters</a> 9449 * in the UTF-16 encoding. 9450 * 9451 * @param ch the {@code char} value to be tested. 9452 * @return {@code true} if the {@code char} value is between 9453 * {@link #MIN_HIGH_SURROGATE} and 9454 * {@link #MAX_HIGH_SURROGATE} inclusive; 9455 * {@code false} otherwise. 9456 * @see Character#isLowSurrogate(char) 9457 * @see Character.UnicodeBlock#of(int) 9458 * @since 1.5 9459 */ 9460 public static boolean isHighSurrogate(char ch) { 9461 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9462 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9463 } 9464 9465 /** 9466 * Determines if the given {@code char} value is a 9467 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9468 * Unicode low-surrogate code unit</a> 9469 * (also known as <i>trailing-surrogate code unit</i>). 9470 * 9471 * <p>Such values do not represent characters by themselves, 9472 * but are used in the representation of 9473 * <a href="#supplementary">supplementary characters</a> 9474 * in the UTF-16 encoding. 9475 * 9476 * @param ch the {@code char} value to be tested. 9477 * @return {@code true} if the {@code char} value is between 9478 * {@link #MIN_LOW_SURROGATE} and 9479 * {@link #MAX_LOW_SURROGATE} inclusive; 9480 * {@code false} otherwise. 9481 * @see Character#isHighSurrogate(char) 9482 * @since 1.5 9483 */ 9484 public static boolean isLowSurrogate(char ch) { 9485 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9486 } 9487 9488 /** 9489 * Determines if the given {@code char} value is a Unicode 9490 * <i>surrogate code unit</i>. 9491 * 9492 * <p>Such values do not represent characters by themselves, 9493 * but are used in the representation of 9494 * <a href="#supplementary">supplementary characters</a> 9495 * in the UTF-16 encoding. 9496 * 9497 * <p>A char value is a surrogate code unit if and only if it is either 9498 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9499 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9500 * 9501 * @param ch the {@code char} value to be tested. 9502 * @return {@code true} if the {@code char} value is between 9503 * {@link #MIN_SURROGATE} and 9504 * {@link #MAX_SURROGATE} inclusive; 9505 * {@code false} otherwise. 9506 * @since 1.7 9507 */ 9508 public static boolean isSurrogate(char ch) { 9509 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9510 } 9511 9512 /** 9513 * Determines whether the specified pair of {@code char} 9514 * values is a valid 9515 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9516 * Unicode surrogate pair</a>. 9517 * 9518 * <p>This method is equivalent to the expression: 9519 * <blockquote><pre>{@code 9520 * isHighSurrogate(high) && isLowSurrogate(low) 9521 * }</pre></blockquote> 9522 * 9523 * @param high the high-surrogate code value to be tested 9524 * @param low the low-surrogate code value to be tested 9525 * @return {@code true} if the specified high and 9526 * low-surrogate code values represent a valid surrogate pair; 9527 * {@code false} otherwise. 9528 * @since 1.5 9529 */ 9530 public static boolean isSurrogatePair(char high, char low) { 9531 return isHighSurrogate(high) && isLowSurrogate(low); 9532 } 9533 9534 /** 9535 * Determines the number of {@code char} values needed to 9536 * represent the specified character (Unicode code point). If the 9537 * specified character is equal to or greater than 0x10000, then 9538 * the method returns 2. Otherwise, the method returns 1. 9539 * 9540 * <p>This method doesn't validate the specified character to be a 9541 * valid Unicode code point. The caller must validate the 9542 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9543 * if necessary. 9544 * 9545 * @param codePoint the character (Unicode code point) to be tested. 9546 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9547 * @see Character#isSupplementaryCodePoint(int) 9548 * @since 1.5 9549 */ 9550 public static int charCount(int codePoint) { 9551 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9552 } 9553 9554 /** 9555 * Converts the specified surrogate pair to its supplementary code 9556 * point value. This method does not validate the specified 9557 * surrogate pair. The caller must validate it using {@link 9558 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9559 * 9560 * @param high the high-surrogate code unit 9561 * @param low the low-surrogate code unit 9562 * @return the supplementary code point composed from the 9563 * specified surrogate pair. 9564 * @since 1.5 9565 */ 9566 public static int toCodePoint(char high, char low) { 9567 // Optimized form of: 9568 // return ((high - MIN_HIGH_SURROGATE) << 10) 9569 // + (low - MIN_LOW_SURROGATE) 9570 // + MIN_SUPPLEMENTARY_CODE_POINT; 9571 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9572 - (MIN_HIGH_SURROGATE << 10) 9573 - MIN_LOW_SURROGATE); 9574 } 9575 9576 /** 9577 * Returns the code point at the given index of the 9578 * {@code CharSequence}. If the {@code char} value at 9579 * the given index in the {@code CharSequence} is in the 9580 * high-surrogate range, the following index is less than the 9581 * length of the {@code CharSequence}, and the 9582 * {@code char} value at the following index is in the 9583 * low-surrogate range, then the supplementary code point 9584 * corresponding to this surrogate pair is returned. Otherwise, 9585 * the {@code char} value at the given index is returned. 9586 * 9587 * @param seq a sequence of {@code char} values (Unicode code 9588 * units) 9589 * @param index the index to the {@code char} values (Unicode 9590 * code units) in {@code seq} to be converted 9591 * @return the Unicode code point at the given index 9592 * @throws NullPointerException if {@code seq} is null. 9593 * @throws IndexOutOfBoundsException if the value 9594 * {@code index} is negative or not less than 9595 * {@link CharSequence#length() seq.length()}. 9596 * @since 1.5 9597 */ 9598 public static int codePointAt(CharSequence seq, int index) { 9599 char c1 = seq.charAt(index); 9600 if (isHighSurrogate(c1) && ++index < seq.length()) { 9601 char c2 = seq.charAt(index); 9602 if (isLowSurrogate(c2)) { 9603 return toCodePoint(c1, c2); 9604 } 9605 } 9606 return c1; 9607 } 9608 9609 /** 9610 * Returns the code point at the given index of the 9611 * {@code char} array. If the {@code char} value at 9612 * the given index in the {@code char} array is in the 9613 * high-surrogate range, the following index is less than the 9614 * length of the {@code char} array, and the 9615 * {@code char} value at the following index is in the 9616 * low-surrogate range, then the supplementary code point 9617 * corresponding to this surrogate pair is returned. Otherwise, 9618 * the {@code char} value at the given index is returned. 9619 * 9620 * @param a the {@code char} array 9621 * @param index the index to the {@code char} values (Unicode 9622 * code units) in the {@code char} array to be converted 9623 * @return the Unicode code point at the given index 9624 * @throws NullPointerException if {@code a} is null. 9625 * @throws IndexOutOfBoundsException if the value 9626 * {@code index} is negative or not less than 9627 * the length of the {@code char} array. 9628 * @since 1.5 9629 */ 9630 public static int codePointAt(char[] a, int index) { 9631 return codePointAtImpl(a, index, a.length); 9632 } 9633 9634 /** 9635 * Returns the code point at the given index of the 9636 * {@code char} array, where only array elements with 9637 * {@code index} less than {@code limit} can be used. If 9638 * the {@code char} value at the given index in the 9639 * {@code char} array is in the high-surrogate range, the 9640 * following index is less than the {@code limit}, and the 9641 * {@code char} value at the following index is in the 9642 * low-surrogate range, then the supplementary code point 9643 * corresponding to this surrogate pair is returned. Otherwise, 9644 * the {@code char} value at the given index is returned. 9645 * 9646 * @param a the {@code char} array 9647 * @param index the index to the {@code char} values (Unicode 9648 * code units) in the {@code char} array to be converted 9649 * @param limit the index after the last array element that 9650 * can be used in the {@code char} array 9651 * @return the Unicode code point at the given index 9652 * @throws NullPointerException if {@code a} is null. 9653 * @throws IndexOutOfBoundsException if the {@code index} 9654 * argument is negative or not less than the {@code limit} 9655 * argument, or if the {@code limit} argument is negative or 9656 * greater than the length of the {@code char} array. 9657 * @since 1.5 9658 */ 9659 public static int codePointAt(char[] a, int index, int limit) { 9660 if (index >= limit || index < 0 || limit > a.length) { 9661 throw new IndexOutOfBoundsException(); 9662 } 9663 return codePointAtImpl(a, index, limit); 9664 } 9665 9666 // throws ArrayIndexOutOfBoundsException if index out of bounds 9667 static int codePointAtImpl(char[] a, int index, int limit) { 9668 char c1 = a[index]; 9669 if (isHighSurrogate(c1) && ++index < limit) { 9670 char c2 = a[index]; 9671 if (isLowSurrogate(c2)) { 9672 return toCodePoint(c1, c2); 9673 } 9674 } 9675 return c1; 9676 } 9677 9678 /** 9679 * Returns the code point preceding the given index of the 9680 * {@code CharSequence}. If the {@code char} value at 9681 * {@code (index - 1)} in the {@code CharSequence} is in 9682 * the low-surrogate range, {@code (index - 2)} is not 9683 * negative, and the {@code char} value at {@code (index - 2)} 9684 * in the {@code CharSequence} is in the 9685 * high-surrogate range, then the supplementary code point 9686 * corresponding to this surrogate pair is returned. Otherwise, 9687 * the {@code char} value at {@code (index - 1)} is 9688 * returned. 9689 * 9690 * @param seq the {@code CharSequence} instance 9691 * @param index the index following the code point that should be returned 9692 * @return the Unicode code point value before the given index. 9693 * @throws NullPointerException if {@code seq} is null. 9694 * @throws IndexOutOfBoundsException if the {@code index} 9695 * argument is less than 1 or greater than {@link 9696 * CharSequence#length() seq.length()}. 9697 * @since 1.5 9698 */ 9699 public static int codePointBefore(CharSequence seq, int index) { 9700 char c2 = seq.charAt(--index); 9701 if (isLowSurrogate(c2) && index > 0) { 9702 char c1 = seq.charAt(--index); 9703 if (isHighSurrogate(c1)) { 9704 return toCodePoint(c1, c2); 9705 } 9706 } 9707 return c2; 9708 } 9709 9710 /** 9711 * Returns the code point preceding the given index of the 9712 * {@code char} array. If the {@code char} value at 9713 * {@code (index - 1)} in the {@code char} array is in 9714 * the low-surrogate range, {@code (index - 2)} is not 9715 * negative, and the {@code char} value at {@code (index - 2)} 9716 * in the {@code char} array is in the 9717 * high-surrogate range, then the supplementary code point 9718 * corresponding to this surrogate pair is returned. Otherwise, 9719 * the {@code char} value at {@code (index - 1)} is 9720 * returned. 9721 * 9722 * @param a the {@code char} array 9723 * @param index the index following the code point that should be returned 9724 * @return the Unicode code point value before the given index. 9725 * @throws NullPointerException if {@code a} is null. 9726 * @throws IndexOutOfBoundsException if the {@code index} 9727 * argument is less than 1 or greater than the length of the 9728 * {@code char} array 9729 * @since 1.5 9730 */ 9731 public static int codePointBefore(char[] a, int index) { 9732 return codePointBeforeImpl(a, index, 0); 9733 } 9734 9735 /** 9736 * Returns the code point preceding the given index of the 9737 * {@code char} array, where only array elements with 9738 * {@code index} greater than or equal to {@code start} 9739 * can be used. If the {@code char} value at {@code (index - 1)} 9740 * in the {@code char} array is in the 9741 * low-surrogate range, {@code (index - 2)} is not less than 9742 * {@code start}, and the {@code char} value at 9743 * {@code (index - 2)} in the {@code char} array is in 9744 * the high-surrogate range, then the supplementary code point 9745 * corresponding to this surrogate pair is returned. Otherwise, 9746 * the {@code char} value at {@code (index - 1)} is 9747 * returned. 9748 * 9749 * @param a the {@code char} array 9750 * @param index the index following the code point that should be returned 9751 * @param start the index of the first array element in the 9752 * {@code char} array 9753 * @return the Unicode code point value before the given index. 9754 * @throws NullPointerException if {@code a} is null. 9755 * @throws IndexOutOfBoundsException if the {@code index} 9756 * argument is not greater than the {@code start} argument or 9757 * is greater than the length of the {@code char} array, or 9758 * if the {@code start} argument is negative or not less than 9759 * the length of the {@code char} array. 9760 * @since 1.5 9761 */ 9762 public static int codePointBefore(char[] a, int index, int start) { 9763 if (index <= start || start < 0 || index > a.length) { 9764 throw new IndexOutOfBoundsException(); 9765 } 9766 return codePointBeforeImpl(a, index, start); 9767 } 9768 9769 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 9770 static int codePointBeforeImpl(char[] a, int index, int start) { 9771 char c2 = a[--index]; 9772 if (isLowSurrogate(c2) && index > start) { 9773 char c1 = a[--index]; 9774 if (isHighSurrogate(c1)) { 9775 return toCodePoint(c1, c2); 9776 } 9777 } 9778 return c2; 9779 } 9780 9781 /** 9782 * Returns the leading surrogate (a 9783 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9784 * high surrogate code unit</a>) of the 9785 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9786 * surrogate pair</a> 9787 * representing the specified supplementary character (Unicode 9788 * code point) in the UTF-16 encoding. If the specified character 9789 * is not a 9790 * <a href="Character.html#supplementary">supplementary character</a>, 9791 * an unspecified {@code char} is returned. 9792 * 9793 * <p>If 9794 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9795 * is {@code true}, then 9796 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9797 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9798 * are also always {@code true}. 9799 * 9800 * @param codePoint a supplementary character (Unicode code point) 9801 * @return the leading surrogate code unit used to represent the 9802 * character in the UTF-16 encoding 9803 * @since 1.7 9804 */ 9805 public static char highSurrogate(int codePoint) { 9806 return (char) ((codePoint >>> 10) 9807 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9808 } 9809 9810 /** 9811 * Returns the trailing surrogate (a 9812 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9813 * low surrogate code unit</a>) of the 9814 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9815 * surrogate pair</a> 9816 * representing the specified supplementary character (Unicode 9817 * code point) in the UTF-16 encoding. If the specified character 9818 * is not a 9819 * <a href="Character.html#supplementary">supplementary character</a>, 9820 * an unspecified {@code char} is returned. 9821 * 9822 * <p>If 9823 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9824 * is {@code true}, then 9825 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9826 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9827 * are also always {@code true}. 9828 * 9829 * @param codePoint a supplementary character (Unicode code point) 9830 * @return the trailing surrogate code unit used to represent the 9831 * character in the UTF-16 encoding 9832 * @since 1.7 9833 */ 9834 public static char lowSurrogate(int codePoint) { 9835 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9836 } 9837 9838 /** 9839 * Converts the specified character (Unicode code point) to its 9840 * UTF-16 representation. If the specified code point is a BMP 9841 * (Basic Multilingual Plane or Plane 0) value, the same value is 9842 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9843 * specified code point is a supplementary character, its 9844 * surrogate values are stored in {@code dst[dstIndex]} 9845 * (high-surrogate) and {@code dst[dstIndex+1]} 9846 * (low-surrogate), and 2 is returned. 9847 * 9848 * @param codePoint the character (Unicode code point) to be converted. 9849 * @param dst an array of {@code char} in which the 9850 * {@code codePoint}'s UTF-16 value is stored. 9851 * @param dstIndex the start index into the {@code dst} 9852 * array where the converted value is stored. 9853 * @return 1 if the code point is a BMP code point, 2 if the 9854 * code point is a supplementary code point. 9855 * @throws IllegalArgumentException if the specified 9856 * {@code codePoint} is not a valid Unicode code point. 9857 * @throws NullPointerException if the specified {@code dst} is null. 9858 * @throws IndexOutOfBoundsException if {@code dstIndex} 9859 * is negative or not less than {@code dst.length}, or if 9860 * {@code dst} at {@code dstIndex} doesn't have enough 9861 * array element(s) to store the resulting {@code char} 9862 * value(s). (If {@code dstIndex} is equal to 9863 * {@code dst.length-1} and the specified 9864 * {@code codePoint} is a supplementary character, the 9865 * high-surrogate value is not stored in 9866 * {@code dst[dstIndex]}.) 9867 * @since 1.5 9868 */ 9869 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9870 if (isBmpCodePoint(codePoint)) { 9871 dst[dstIndex] = (char) codePoint; 9872 return 1; 9873 } else if (isValidCodePoint(codePoint)) { 9874 toSurrogates(codePoint, dst, dstIndex); 9875 return 2; 9876 } else { 9877 throw new IllegalArgumentException( 9878 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9879 } 9880 } 9881 9882 /** 9883 * Converts the specified character (Unicode code point) to its 9884 * UTF-16 representation stored in a {@code char} array. If 9885 * the specified code point is a BMP (Basic Multilingual Plane or 9886 * Plane 0) value, the resulting {@code char} array has 9887 * the same value as {@code codePoint}. If the specified code 9888 * point is a supplementary code point, the resulting 9889 * {@code char} array has the corresponding surrogate pair. 9890 * 9891 * @param codePoint a Unicode code point 9892 * @return a {@code char} array having 9893 * {@code codePoint}'s UTF-16 representation. 9894 * @throws IllegalArgumentException if the specified 9895 * {@code codePoint} is not a valid Unicode code point. 9896 * @since 1.5 9897 */ 9898 public static char[] toChars(int codePoint) { 9899 if (isBmpCodePoint(codePoint)) { 9900 return new char[] { (char) codePoint }; 9901 } else if (isValidCodePoint(codePoint)) { 9902 char[] result = new char[2]; 9903 toSurrogates(codePoint, result, 0); 9904 return result; 9905 } else { 9906 throw new IllegalArgumentException( 9907 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9908 } 9909 } 9910 9911 static void toSurrogates(int codePoint, char[] dst, int index) { 9912 // We write elements "backwards" to guarantee all-or-nothing 9913 dst[index+1] = lowSurrogate(codePoint); 9914 dst[index] = highSurrogate(codePoint); 9915 } 9916 9917 /** 9918 * Returns the number of Unicode code points in the text range of 9919 * the specified char sequence. The text range begins at the 9920 * specified {@code beginIndex} and extends to the 9921 * {@code char} at index {@code endIndex - 1}. Thus the 9922 * length (in {@code char}s) of the text range is 9923 * {@code endIndex-beginIndex}. Unpaired surrogates within 9924 * the text range count as one code point each. 9925 * 9926 * @param seq the char sequence 9927 * @param beginIndex the index to the first {@code char} of 9928 * the text range. 9929 * @param endIndex the index after the last {@code char} of 9930 * the text range. 9931 * @return the number of Unicode code points in the specified text 9932 * range 9933 * @throws NullPointerException if {@code seq} is null. 9934 * @throws IndexOutOfBoundsException if the 9935 * {@code beginIndex} is negative, or {@code endIndex} 9936 * is larger than the length of the given sequence, or 9937 * {@code beginIndex} is larger than {@code endIndex}. 9938 * @since 1.5 9939 */ 9940 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9941 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 9942 int n = endIndex - beginIndex; 9943 for (int i = beginIndex; i < endIndex; ) { 9944 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9945 isLowSurrogate(seq.charAt(i))) { 9946 n--; 9947 i++; 9948 } 9949 } 9950 return n; 9951 } 9952 9953 /** 9954 * Returns the number of Unicode code points in a subarray of the 9955 * {@code char} array argument. The {@code offset} 9956 * argument is the index of the first {@code char} of the 9957 * subarray and the {@code count} argument specifies the 9958 * length of the subarray in {@code char}s. Unpaired 9959 * surrogates within the subarray count as one code point each. 9960 * 9961 * @param a the {@code char} array 9962 * @param offset the index of the first {@code char} in the 9963 * given {@code char} array 9964 * @param count the length of the subarray in {@code char}s 9965 * @return the number of Unicode code points in the specified subarray 9966 * @throws NullPointerException if {@code a} is null. 9967 * @throws IndexOutOfBoundsException if {@code offset} or 9968 * {@code count} is negative, or if {@code offset + 9969 * count} is larger than the length of the given array. 9970 * @since 1.5 9971 */ 9972 public static int codePointCount(char[] a, int offset, int count) { 9973 Objects.checkFromIndexSize(offset, count, a.length); 9974 return codePointCountImpl(a, offset, count); 9975 } 9976 9977 static int codePointCountImpl(char[] a, int offset, int count) { 9978 int endIndex = offset + count; 9979 int n = count; 9980 for (int i = offset; i < endIndex; ) { 9981 if (isHighSurrogate(a[i++]) && i < endIndex && 9982 isLowSurrogate(a[i])) { 9983 n--; 9984 i++; 9985 } 9986 } 9987 return n; 9988 } 9989 9990 /** 9991 * Returns the index within the given char sequence that is offset 9992 * from the given {@code index} by {@code codePointOffset} 9993 * code points. Unpaired surrogates within the text range given by 9994 * {@code index} and {@code codePointOffset} count as 9995 * one code point each. 9996 * 9997 * @param seq the char sequence 9998 * @param index the index to be offset 9999 * @param codePointOffset the offset in code points 10000 * @return the index within the char sequence 10001 * @throws NullPointerException if {@code seq} is null. 10002 * @throws IndexOutOfBoundsException if {@code index} 10003 * is negative or larger than the length of the char sequence, 10004 * or if {@code codePointOffset} is positive and the 10005 * subsequence starting with {@code index} has fewer than 10006 * {@code codePointOffset} code points, or if 10007 * {@code codePointOffset} is negative and the subsequence 10008 * before {@code index} has fewer than the absolute value 10009 * of {@code codePointOffset} code points. 10010 * @since 1.5 10011 */ 10012 public static int offsetByCodePoints(CharSequence seq, int index, 10013 int codePointOffset) { 10014 int length = seq.length(); 10015 if (index < 0 || index > length) { 10016 throw new IndexOutOfBoundsException(); 10017 } 10018 10019 int x = index; 10020 if (codePointOffset >= 0) { 10021 int i; 10022 for (i = 0; x < length && i < codePointOffset; i++) { 10023 if (isHighSurrogate(seq.charAt(x++)) && x < length && 10024 isLowSurrogate(seq.charAt(x))) { 10025 x++; 10026 } 10027 } 10028 if (i < codePointOffset) { 10029 throw new IndexOutOfBoundsException(); 10030 } 10031 } else { 10032 int i; 10033 for (i = codePointOffset; x > 0 && i < 0; i++) { 10034 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 10035 isHighSurrogate(seq.charAt(x-1))) { 10036 x--; 10037 } 10038 } 10039 if (i < 0) { 10040 throw new IndexOutOfBoundsException(); 10041 } 10042 } 10043 return x; 10044 } 10045 10046 /** 10047 * Returns the index within the given {@code char} subarray 10048 * that is offset from the given {@code index} by 10049 * {@code codePointOffset} code points. The 10050 * {@code start} and {@code count} arguments specify a 10051 * subarray of the {@code char} array. Unpaired surrogates 10052 * within the text range given by {@code index} and 10053 * {@code codePointOffset} count as one code point each. 10054 * 10055 * @param a the {@code char} array 10056 * @param start the index of the first {@code char} of the 10057 * subarray 10058 * @param count the length of the subarray in {@code char}s 10059 * @param index the index to be offset 10060 * @param codePointOffset the offset in code points 10061 * @return the index within the subarray 10062 * @throws NullPointerException if {@code a} is null. 10063 * @throws IndexOutOfBoundsException 10064 * if {@code start} or {@code count} is negative, 10065 * or if {@code start + count} is larger than the length of 10066 * the given array, 10067 * or if {@code index} is less than {@code start} or 10068 * larger then {@code start + count}, 10069 * or if {@code codePointOffset} is positive and the text range 10070 * starting with {@code index} and ending with {@code start + count - 1} 10071 * has fewer than {@code codePointOffset} code 10072 * points, 10073 * or if {@code codePointOffset} is negative and the text range 10074 * starting with {@code start} and ending with {@code index - 1} 10075 * has fewer than the absolute value of 10076 * {@code codePointOffset} code points. 10077 * @since 1.5 10078 */ 10079 public static int offsetByCodePoints(char[] a, int start, int count, 10080 int index, int codePointOffset) { 10081 if (count > a.length-start || start < 0 || count < 0 10082 || index < start || index > start+count) { 10083 throw new IndexOutOfBoundsException(); 10084 } 10085 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 10086 } 10087 10088 static int offsetByCodePointsImpl(char[]a, int start, int count, 10089 int index, int codePointOffset) { 10090 int x = index; 10091 if (codePointOffset >= 0) { 10092 int limit = start + count; 10093 int i; 10094 for (i = 0; x < limit && i < codePointOffset; i++) { 10095 if (isHighSurrogate(a[x++]) && x < limit && 10096 isLowSurrogate(a[x])) { 10097 x++; 10098 } 10099 } 10100 if (i < codePointOffset) { 10101 throw new IndexOutOfBoundsException(); 10102 } 10103 } else { 10104 int i; 10105 for (i = codePointOffset; x > start && i < 0; i++) { 10106 if (isLowSurrogate(a[--x]) && x > start && 10107 isHighSurrogate(a[x-1])) { 10108 x--; 10109 } 10110 } 10111 if (i < 0) { 10112 throw new IndexOutOfBoundsException(); 10113 } 10114 } 10115 return x; 10116 } 10117 10118 /** 10119 * Determines if the specified character is a lowercase character. 10120 * <p> 10121 * A character is lowercase if its general category type, provided 10122 * by {@code Character.getType(ch)}, is 10123 * {@code LOWERCASE_LETTER}, or it has contributory property 10124 * Other_Lowercase as defined by the Unicode Standard. 10125 * <p> 10126 * The following are examples of lowercase characters: 10127 * <blockquote><pre> 10128 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10129 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10130 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10131 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10132 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10133 * </pre></blockquote> 10134 * <p> Many other Unicode characters are lowercase too. 10135 * 10136 * <p><b>Note:</b> This method cannot handle <a 10137 * href="#supplementary"> supplementary characters</a>. To support 10138 * all Unicode characters, including supplementary characters, use 10139 * the {@link #isLowerCase(int)} method. 10140 * 10141 * @param ch the character to be tested. 10142 * @return {@code true} if the character is lowercase; 10143 * {@code false} otherwise. 10144 * @see Character#isLowerCase(char) 10145 * @see Character#isTitleCase(char) 10146 * @see Character#toLowerCase(char) 10147 * @see Character#getType(char) 10148 */ 10149 public static boolean isLowerCase(char ch) { 10150 return isLowerCase((int)ch); 10151 } 10152 10153 /** 10154 * Determines if the specified character (Unicode code point) is a 10155 * lowercase character. 10156 * <p> 10157 * A character is lowercase if its general category type, provided 10158 * by {@link Character#getType getType(codePoint)}, is 10159 * {@code LOWERCASE_LETTER}, or it has contributory property 10160 * Other_Lowercase as defined by the Unicode Standard. 10161 * <p> 10162 * The following are examples of lowercase characters: 10163 * <blockquote><pre> 10164 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10165 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10166 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10167 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10168 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10169 * </pre></blockquote> 10170 * <p> Many other Unicode characters are lowercase too. 10171 * 10172 * @param codePoint the character (Unicode code point) to be tested. 10173 * @return {@code true} if the character is lowercase; 10174 * {@code false} otherwise. 10175 * @see Character#isLowerCase(int) 10176 * @see Character#isTitleCase(int) 10177 * @see Character#toLowerCase(int) 10178 * @see Character#getType(int) 10179 * @since 1.5 10180 */ 10181 public static boolean isLowerCase(int codePoint) { 10182 return CharacterData.of(codePoint).isLowerCase(codePoint); 10183 } 10184 10185 /** 10186 * Determines if the specified character is an uppercase character. 10187 * <p> 10188 * A character is uppercase if its general category type, provided by 10189 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 10190 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10191 * <p> 10192 * The following are examples of uppercase characters: 10193 * <blockquote><pre> 10194 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10195 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10196 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10197 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10198 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10199 * </pre></blockquote> 10200 * <p> Many other Unicode characters are uppercase too. 10201 * 10202 * <p><b>Note:</b> This method cannot handle <a 10203 * href="#supplementary"> supplementary characters</a>. To support 10204 * all Unicode characters, including supplementary characters, use 10205 * the {@link #isUpperCase(int)} method. 10206 * 10207 * @param ch the character to be tested. 10208 * @return {@code true} if the character is uppercase; 10209 * {@code false} otherwise. 10210 * @see Character#isLowerCase(char) 10211 * @see Character#isTitleCase(char) 10212 * @see Character#toUpperCase(char) 10213 * @see Character#getType(char) 10214 * @since 1.0 10215 */ 10216 public static boolean isUpperCase(char ch) { 10217 return isUpperCase((int)ch); 10218 } 10219 10220 /** 10221 * Determines if the specified character (Unicode code point) is an uppercase character. 10222 * <p> 10223 * A character is uppercase if its general category type, provided by 10224 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 10225 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10226 * <p> 10227 * The following are examples of uppercase characters: 10228 * <blockquote><pre> 10229 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10230 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10231 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10232 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10233 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10234 * </pre></blockquote> 10235 * <p> Many other Unicode characters are uppercase too. 10236 * 10237 * @param codePoint the character (Unicode code point) to be tested. 10238 * @return {@code true} if the character is uppercase; 10239 * {@code false} otherwise. 10240 * @see Character#isLowerCase(int) 10241 * @see Character#isTitleCase(int) 10242 * @see Character#toUpperCase(int) 10243 * @see Character#getType(int) 10244 * @since 1.5 10245 */ 10246 public static boolean isUpperCase(int codePoint) { 10247 return CharacterData.of(codePoint).isUpperCase(codePoint); 10248 } 10249 10250 /** 10251 * Determines if the specified character is a titlecase character. 10252 * <p> 10253 * A character is a titlecase character if its general 10254 * category type, provided by {@code Character.getType(ch)}, 10255 * is {@code TITLECASE_LETTER}. 10256 * <p> 10257 * Some characters look like pairs of Latin letters. For example, there 10258 * is an uppercase letter that looks like "LJ" and has a corresponding 10259 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10260 * is the appropriate form to use when rendering a word in lowercase 10261 * with initial capitals, as for a book title. 10262 * <p> 10263 * These are some of the Unicode characters for which this method returns 10264 * {@code true}: 10265 * <ul> 10266 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10267 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10268 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10269 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10270 * </ul> 10271 * <p> Many other Unicode characters are titlecase too. 10272 * 10273 * <p><b>Note:</b> This method cannot handle <a 10274 * href="#supplementary"> supplementary characters</a>. To support 10275 * all Unicode characters, including supplementary characters, use 10276 * the {@link #isTitleCase(int)} method. 10277 * 10278 * @param ch the character to be tested. 10279 * @return {@code true} if the character is titlecase; 10280 * {@code false} otherwise. 10281 * @see Character#isLowerCase(char) 10282 * @see Character#isUpperCase(char) 10283 * @see Character#toTitleCase(char) 10284 * @see Character#getType(char) 10285 * @since 1.0.2 10286 */ 10287 public static boolean isTitleCase(char ch) { 10288 return isTitleCase((int)ch); 10289 } 10290 10291 /** 10292 * Determines if the specified character (Unicode code point) is a titlecase character. 10293 * <p> 10294 * A character is a titlecase character if its general 10295 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10296 * is {@code TITLECASE_LETTER}. 10297 * <p> 10298 * Some characters look like pairs of Latin letters. For example, there 10299 * is an uppercase letter that looks like "LJ" and has a corresponding 10300 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10301 * is the appropriate form to use when rendering a word in lowercase 10302 * with initial capitals, as for a book title. 10303 * <p> 10304 * These are some of the Unicode characters for which this method returns 10305 * {@code true}: 10306 * <ul> 10307 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10308 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10309 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10310 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10311 * </ul> 10312 * <p> Many other Unicode characters are titlecase too. 10313 * 10314 * @param codePoint the character (Unicode code point) to be tested. 10315 * @return {@code true} if the character is titlecase; 10316 * {@code false} otherwise. 10317 * @see Character#isLowerCase(int) 10318 * @see Character#isUpperCase(int) 10319 * @see Character#toTitleCase(int) 10320 * @see Character#getType(int) 10321 * @since 1.5 10322 */ 10323 public static boolean isTitleCase(int codePoint) { 10324 return getType(codePoint) == Character.TITLECASE_LETTER; 10325 } 10326 10327 /** 10328 * Determines if the specified character is a digit. 10329 * <p> 10330 * A character is a digit if its general category type, provided 10331 * by {@code Character.getType(ch)}, is 10332 * {@code DECIMAL_DIGIT_NUMBER}. 10333 * <p> 10334 * Some Unicode character ranges that contain digits: 10335 * <ul> 10336 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10337 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10338 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10339 * Arabic-Indic digits 10340 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10341 * Extended Arabic-Indic digits 10342 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10343 * Devanagari digits 10344 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10345 * Fullwidth digits 10346 * </ul> 10347 * 10348 * Many other character ranges contain digits as well. 10349 * 10350 * <p><b>Note:</b> This method cannot handle <a 10351 * href="#supplementary"> supplementary characters</a>. To support 10352 * all Unicode characters, including supplementary characters, use 10353 * the {@link #isDigit(int)} method. 10354 * 10355 * @param ch the character to be tested. 10356 * @return {@code true} if the character is a digit; 10357 * {@code false} otherwise. 10358 * @see Character#digit(char, int) 10359 * @see Character#forDigit(int, int) 10360 * @see Character#getType(char) 10361 */ 10362 public static boolean isDigit(char ch) { 10363 return isDigit((int)ch); 10364 } 10365 10366 /** 10367 * Determines if the specified character (Unicode code point) is a digit. 10368 * <p> 10369 * A character is a digit if its general category type, provided 10370 * by {@link Character#getType(int) getType(codePoint)}, is 10371 * {@code DECIMAL_DIGIT_NUMBER}. 10372 * <p> 10373 * Some Unicode character ranges that contain digits: 10374 * <ul> 10375 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10376 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10377 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10378 * Arabic-Indic digits 10379 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10380 * Extended Arabic-Indic digits 10381 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10382 * Devanagari digits 10383 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10384 * Fullwidth digits 10385 * </ul> 10386 * 10387 * Many other character ranges contain digits as well. 10388 * 10389 * @param codePoint the character (Unicode code point) to be tested. 10390 * @return {@code true} if the character is a digit; 10391 * {@code false} otherwise. 10392 * @see Character#forDigit(int, int) 10393 * @see Character#getType(int) 10394 * @since 1.5 10395 */ 10396 public static boolean isDigit(int codePoint) { 10397 return CharacterData.of(codePoint).isDigit(codePoint); 10398 } 10399 10400 /** 10401 * Determines if a character is defined in Unicode. 10402 * <p> 10403 * A character is defined if at least one of the following is true: 10404 * <ul> 10405 * <li>It has an entry in the UnicodeData file. 10406 * <li>It has a value in a range defined by the UnicodeData file. 10407 * </ul> 10408 * 10409 * <p><b>Note:</b> This method cannot handle <a 10410 * href="#supplementary"> supplementary characters</a>. To support 10411 * all Unicode characters, including supplementary characters, use 10412 * the {@link #isDefined(int)} method. 10413 * 10414 * @param ch the character to be tested 10415 * @return {@code true} if the character has a defined meaning 10416 * in Unicode; {@code false} otherwise. 10417 * @see Character#isDigit(char) 10418 * @see Character#isLetter(char) 10419 * @see Character#isLetterOrDigit(char) 10420 * @see Character#isLowerCase(char) 10421 * @see Character#isTitleCase(char) 10422 * @see Character#isUpperCase(char) 10423 * @since 1.0.2 10424 */ 10425 public static boolean isDefined(char ch) { 10426 return isDefined((int)ch); 10427 } 10428 10429 /** 10430 * Determines if a character (Unicode code point) is defined in Unicode. 10431 * <p> 10432 * A character is defined if at least one of the following is true: 10433 * <ul> 10434 * <li>It has an entry in the UnicodeData file. 10435 * <li>It has a value in a range defined by the UnicodeData file. 10436 * </ul> 10437 * 10438 * @param codePoint the character (Unicode code point) to be tested. 10439 * @return {@code true} if the character has a defined meaning 10440 * in Unicode; {@code false} otherwise. 10441 * @see Character#isDigit(int) 10442 * @see Character#isLetter(int) 10443 * @see Character#isLetterOrDigit(int) 10444 * @see Character#isLowerCase(int) 10445 * @see Character#isTitleCase(int) 10446 * @see Character#isUpperCase(int) 10447 * @since 1.5 10448 */ 10449 public static boolean isDefined(int codePoint) { 10450 return getType(codePoint) != Character.UNASSIGNED; 10451 } 10452 10453 /** 10454 * Determines if the specified character is a letter. 10455 * <p> 10456 * A character is considered to be a letter if its general 10457 * category type, provided by {@code Character.getType(ch)}, 10458 * is any of the following: 10459 * <ul> 10460 * <li> {@code UPPERCASE_LETTER} 10461 * <li> {@code LOWERCASE_LETTER} 10462 * <li> {@code TITLECASE_LETTER} 10463 * <li> {@code MODIFIER_LETTER} 10464 * <li> {@code OTHER_LETTER} 10465 * </ul> 10466 * 10467 * Not all letters have case. Many characters are 10468 * letters but are neither uppercase nor lowercase nor titlecase. 10469 * 10470 * <p><b>Note:</b> This method cannot handle <a 10471 * href="#supplementary"> supplementary characters</a>. To support 10472 * all Unicode characters, including supplementary characters, use 10473 * the {@link #isLetter(int)} method. 10474 * 10475 * @param ch the character to be tested. 10476 * @return {@code true} if the character is a letter; 10477 * {@code false} otherwise. 10478 * @see Character#isDigit(char) 10479 * @see Character#isJavaIdentifierStart(char) 10480 * @see Character#isJavaLetter(char) 10481 * @see Character#isJavaLetterOrDigit(char) 10482 * @see Character#isLetterOrDigit(char) 10483 * @see Character#isLowerCase(char) 10484 * @see Character#isTitleCase(char) 10485 * @see Character#isUnicodeIdentifierStart(char) 10486 * @see Character#isUpperCase(char) 10487 */ 10488 public static boolean isLetter(char ch) { 10489 return isLetter((int)ch); 10490 } 10491 10492 /** 10493 * Determines if the specified character (Unicode code point) is a letter. 10494 * <p> 10495 * A character is considered to be a letter if its general 10496 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10497 * is any of the following: 10498 * <ul> 10499 * <li> {@code UPPERCASE_LETTER} 10500 * <li> {@code LOWERCASE_LETTER} 10501 * <li> {@code TITLECASE_LETTER} 10502 * <li> {@code MODIFIER_LETTER} 10503 * <li> {@code OTHER_LETTER} 10504 * </ul> 10505 * 10506 * Not all letters have case. Many characters are 10507 * letters but are neither uppercase nor lowercase nor titlecase. 10508 * 10509 * @param codePoint the character (Unicode code point) to be tested. 10510 * @return {@code true} if the character is a letter; 10511 * {@code false} otherwise. 10512 * @see Character#isDigit(int) 10513 * @see Character#isJavaIdentifierStart(int) 10514 * @see Character#isLetterOrDigit(int) 10515 * @see Character#isLowerCase(int) 10516 * @see Character#isTitleCase(int) 10517 * @see Character#isUnicodeIdentifierStart(int) 10518 * @see Character#isUpperCase(int) 10519 * @since 1.5 10520 */ 10521 public static boolean isLetter(int codePoint) { 10522 return ((((1 << Character.UPPERCASE_LETTER) | 10523 (1 << Character.LOWERCASE_LETTER) | 10524 (1 << Character.TITLECASE_LETTER) | 10525 (1 << Character.MODIFIER_LETTER) | 10526 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10527 != 0; 10528 } 10529 10530 /** 10531 * Determines if the specified character is a letter or digit. 10532 * <p> 10533 * A character is considered to be a letter or digit if either 10534 * {@code Character.isLetter(char ch)} or 10535 * {@code Character.isDigit(char ch)} returns 10536 * {@code true} for the character. 10537 * 10538 * <p><b>Note:</b> This method cannot handle <a 10539 * href="#supplementary"> supplementary characters</a>. To support 10540 * all Unicode characters, including supplementary characters, use 10541 * the {@link #isLetterOrDigit(int)} method. 10542 * 10543 * @param ch the character to be tested. 10544 * @return {@code true} if the character is a letter or digit; 10545 * {@code false} otherwise. 10546 * @see Character#isDigit(char) 10547 * @see Character#isJavaIdentifierPart(char) 10548 * @see Character#isJavaLetter(char) 10549 * @see Character#isJavaLetterOrDigit(char) 10550 * @see Character#isLetter(char) 10551 * @see Character#isUnicodeIdentifierPart(char) 10552 * @since 1.0.2 10553 */ 10554 public static boolean isLetterOrDigit(char ch) { 10555 return isLetterOrDigit((int)ch); 10556 } 10557 10558 /** 10559 * Determines if the specified character (Unicode code point) is a letter or digit. 10560 * <p> 10561 * A character is considered to be a letter or digit if either 10562 * {@link #isLetter(int) isLetter(codePoint)} or 10563 * {@link #isDigit(int) isDigit(codePoint)} returns 10564 * {@code true} for the character. 10565 * 10566 * @param codePoint the character (Unicode code point) to be tested. 10567 * @return {@code true} if the character is a letter or digit; 10568 * {@code false} otherwise. 10569 * @see Character#isDigit(int) 10570 * @see Character#isJavaIdentifierPart(int) 10571 * @see Character#isLetter(int) 10572 * @see Character#isUnicodeIdentifierPart(int) 10573 * @since 1.5 10574 */ 10575 public static boolean isLetterOrDigit(int codePoint) { 10576 return ((((1 << Character.UPPERCASE_LETTER) | 10577 (1 << Character.LOWERCASE_LETTER) | 10578 (1 << Character.TITLECASE_LETTER) | 10579 (1 << Character.MODIFIER_LETTER) | 10580 (1 << Character.OTHER_LETTER) | 10581 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10582 != 0; 10583 } 10584 10585 /** 10586 * Determines if the specified character is permissible as the first 10587 * character in a Java identifier. 10588 * <p> 10589 * A character may start a Java identifier if and only if 10590 * one of the following conditions is true: 10591 * <ul> 10592 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10593 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10594 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10595 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10596 * </ul> 10597 * 10598 * @param ch the character to be tested. 10599 * @return {@code true} if the character may start a Java 10600 * identifier; {@code false} otherwise. 10601 * @see Character#isJavaLetterOrDigit(char) 10602 * @see Character#isJavaIdentifierStart(char) 10603 * @see Character#isJavaIdentifierPart(char) 10604 * @see Character#isLetter(char) 10605 * @see Character#isLetterOrDigit(char) 10606 * @see Character#isUnicodeIdentifierStart(char) 10607 * @since 1.0.2 10608 * @deprecated Replaced by isJavaIdentifierStart(char). 10609 */ 10610 @Deprecated(since="1.1") 10611 public static boolean isJavaLetter(char ch) { 10612 return isJavaIdentifierStart(ch); 10613 } 10614 10615 /** 10616 * Determines if the specified character may be part of a Java 10617 * identifier as other than the first character. 10618 * <p> 10619 * A character may be part of a Java identifier if and only if one 10620 * of the following conditions is true: 10621 * <ul> 10622 * <li> it is a letter 10623 * <li> it is a currency symbol (such as {@code '$'}) 10624 * <li> it is a connecting punctuation character (such as {@code '_'}) 10625 * <li> it is a digit 10626 * <li> it is a numeric letter (such as a Roman numeral character) 10627 * <li> it is a combining mark 10628 * <li> it is a non-spacing mark 10629 * <li> {@code isIdentifierIgnorable} returns 10630 * {@code true} for the character. 10631 * </ul> 10632 * 10633 * @param ch the character to be tested. 10634 * @return {@code true} if the character may be part of a 10635 * Java identifier; {@code false} otherwise. 10636 * @see Character#isJavaLetter(char) 10637 * @see Character#isJavaIdentifierStart(char) 10638 * @see Character#isJavaIdentifierPart(char) 10639 * @see Character#isLetter(char) 10640 * @see Character#isLetterOrDigit(char) 10641 * @see Character#isUnicodeIdentifierPart(char) 10642 * @see Character#isIdentifierIgnorable(char) 10643 * @since 1.0.2 10644 * @deprecated Replaced by isJavaIdentifierPart(char). 10645 */ 10646 @Deprecated(since="1.1") 10647 public static boolean isJavaLetterOrDigit(char ch) { 10648 return isJavaIdentifierPart(ch); 10649 } 10650 10651 /** 10652 * Determines if the specified character (Unicode code point) is alphabetic. 10653 * <p> 10654 * A character is considered to be alphabetic if its general category type, 10655 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10656 * the following: 10657 * <ul> 10658 * <li> {@code UPPERCASE_LETTER} 10659 * <li> {@code LOWERCASE_LETTER} 10660 * <li> {@code TITLECASE_LETTER} 10661 * <li> {@code MODIFIER_LETTER} 10662 * <li> {@code OTHER_LETTER} 10663 * <li> {@code LETTER_NUMBER} 10664 * </ul> 10665 * or it has contributory property Other_Alphabetic as defined by the 10666 * Unicode Standard. 10667 * 10668 * @param codePoint the character (Unicode code point) to be tested. 10669 * @return {@code true} if the character is a Unicode alphabet 10670 * character, {@code false} otherwise. 10671 * @since 1.7 10672 */ 10673 public static boolean isAlphabetic(int codePoint) { 10674 return (((((1 << Character.UPPERCASE_LETTER) | 10675 (1 << Character.LOWERCASE_LETTER) | 10676 (1 << Character.TITLECASE_LETTER) | 10677 (1 << Character.MODIFIER_LETTER) | 10678 (1 << Character.OTHER_LETTER) | 10679 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10680 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10681 } 10682 10683 /** 10684 * Determines if the specified character (Unicode code point) is a CJKV 10685 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10686 * the Unicode Standard. 10687 * 10688 * @param codePoint the character (Unicode code point) to be tested. 10689 * @return {@code true} if the character is a Unicode ideograph 10690 * character, {@code false} otherwise. 10691 * @since 1.7 10692 */ 10693 public static boolean isIdeographic(int codePoint) { 10694 return CharacterData.of(codePoint).isIdeographic(codePoint); 10695 } 10696 10697 /** 10698 * Determines if the specified character is 10699 * permissible as the first character in a Java identifier. 10700 * <p> 10701 * A character may start a Java identifier if and only if 10702 * one of the following conditions is true: 10703 * <ul> 10704 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10705 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10706 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10707 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10708 * </ul> 10709 * 10710 * <p><b>Note:</b> This method cannot handle <a 10711 * href="#supplementary"> supplementary characters</a>. To support 10712 * all Unicode characters, including supplementary characters, use 10713 * the {@link #isJavaIdentifierStart(int)} method. 10714 * 10715 * @param ch the character to be tested. 10716 * @return {@code true} if the character may start a Java identifier; 10717 * {@code false} otherwise. 10718 * @see Character#isJavaIdentifierPart(char) 10719 * @see Character#isLetter(char) 10720 * @see Character#isUnicodeIdentifierStart(char) 10721 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10722 * @since 1.1 10723 */ 10724 @SuppressWarnings("doclint:reference") // cross-module links 10725 public static boolean isJavaIdentifierStart(char ch) { 10726 return isJavaIdentifierStart((int)ch); 10727 } 10728 10729 /** 10730 * Determines if the character (Unicode code point) is 10731 * permissible as the first character in a Java identifier. 10732 * <p> 10733 * A character may start a Java identifier if and only if 10734 * one of the following conditions is true: 10735 * <ul> 10736 * <li> {@link #isLetter(int) isLetter(codePoint)} 10737 * returns {@code true} 10738 * <li> {@link #getType(int) getType(codePoint)} 10739 * returns {@code LETTER_NUMBER} 10740 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10741 * <li> the referenced character is a connecting punctuation character 10742 * (such as {@code '_'}). 10743 * </ul> 10744 * 10745 * @param codePoint the character (Unicode code point) to be tested. 10746 * @return {@code true} if the character may start a Java identifier; 10747 * {@code false} otherwise. 10748 * @see Character#isJavaIdentifierPart(int) 10749 * @see Character#isLetter(int) 10750 * @see Character#isUnicodeIdentifierStart(int) 10751 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10752 * @since 1.5 10753 */ 10754 @SuppressWarnings("doclint:reference") // cross-module links 10755 public static boolean isJavaIdentifierStart(int codePoint) { 10756 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10757 } 10758 10759 /** 10760 * Determines if the specified character may be part of a Java 10761 * identifier as other than the first character. 10762 * <p> 10763 * A character may be part of a Java identifier if any of the following 10764 * conditions are true: 10765 * <ul> 10766 * <li> it is a letter 10767 * <li> it is a currency symbol (such as {@code '$'}) 10768 * <li> it is a connecting punctuation character (such as {@code '_'}) 10769 * <li> it is a digit 10770 * <li> it is a numeric letter (such as a Roman numeral character) 10771 * <li> it is a combining mark 10772 * <li> it is a non-spacing mark 10773 * <li> {@code isIdentifierIgnorable} returns 10774 * {@code true} for the character 10775 * </ul> 10776 * 10777 * <p><b>Note:</b> This method cannot handle <a 10778 * href="#supplementary"> supplementary characters</a>. To support 10779 * all Unicode characters, including supplementary characters, use 10780 * the {@link #isJavaIdentifierPart(int)} method. 10781 * 10782 * @param ch the character to be tested. 10783 * @return {@code true} if the character may be part of a 10784 * Java identifier; {@code false} otherwise. 10785 * @see Character#isIdentifierIgnorable(char) 10786 * @see Character#isJavaIdentifierStart(char) 10787 * @see Character#isLetterOrDigit(char) 10788 * @see Character#isUnicodeIdentifierPart(char) 10789 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10790 * @since 1.1 10791 */ 10792 @SuppressWarnings("doclint:reference") // cross-module links 10793 public static boolean isJavaIdentifierPart(char ch) { 10794 return isJavaIdentifierPart((int)ch); 10795 } 10796 10797 /** 10798 * Determines if the character (Unicode code point) may be part of a Java 10799 * identifier as other than the first character. 10800 * <p> 10801 * A character may be part of a Java identifier if any of the following 10802 * conditions are true: 10803 * <ul> 10804 * <li> it is a letter 10805 * <li> it is a currency symbol (such as {@code '$'}) 10806 * <li> it is a connecting punctuation character (such as {@code '_'}) 10807 * <li> it is a digit 10808 * <li> it is a numeric letter (such as a Roman numeral character) 10809 * <li> it is a combining mark 10810 * <li> it is a non-spacing mark 10811 * <li> {@link #isIdentifierIgnorable(int) 10812 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10813 * the code point 10814 * </ul> 10815 * 10816 * @param codePoint the character (Unicode code point) to be tested. 10817 * @return {@code true} if the character may be part of a 10818 * Java identifier; {@code false} otherwise. 10819 * @see Character#isIdentifierIgnorable(int) 10820 * @see Character#isJavaIdentifierStart(int) 10821 * @see Character#isLetterOrDigit(int) 10822 * @see Character#isUnicodeIdentifierPart(int) 10823 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10824 * @since 1.5 10825 */ 10826 @SuppressWarnings("doclint:reference") // cross-module links 10827 public static boolean isJavaIdentifierPart(int codePoint) { 10828 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10829 } 10830 10831 /** 10832 * Determines if the specified character is permissible as the 10833 * first character in a Unicode identifier. 10834 * <p> 10835 * A character may start a Unicode identifier if and only if 10836 * one of the following conditions is true: 10837 * <ul> 10838 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10839 * <li> {@link #getType(char) getType(ch)} returns 10840 * {@code LETTER_NUMBER}. 10841 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10842 * {@code Other_ID_Start}</a> character. 10843 * </ul> 10844 * <p> 10845 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10846 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10847 * with the following profile of UAX31: 10848 * <pre> 10849 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10850 * </pre> 10851 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10852 * compatibility. 10853 * 10854 * <p><b>Note:</b> This method cannot handle <a 10855 * href="#supplementary"> supplementary characters</a>. To support 10856 * all Unicode characters, including supplementary characters, use 10857 * the {@link #isUnicodeIdentifierStart(int)} method. 10858 * 10859 * @param ch the character to be tested. 10860 * @return {@code true} if the character may start a Unicode 10861 * identifier; {@code false} otherwise. 10862 * 10863 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10864 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10865 * @see Character#isJavaIdentifierStart(char) 10866 * @see Character#isLetter(char) 10867 * @see Character#isUnicodeIdentifierPart(char) 10868 * @since 1.1 10869 */ 10870 public static boolean isUnicodeIdentifierStart(char ch) { 10871 return isUnicodeIdentifierStart((int)ch); 10872 } 10873 10874 /** 10875 * Determines if the specified character (Unicode code point) is permissible as the 10876 * first character in a Unicode identifier. 10877 * <p> 10878 * A character may start a Unicode identifier if and only if 10879 * one of the following conditions is true: 10880 * <ul> 10881 * <li> {@link #isLetter(int) isLetter(codePoint)} 10882 * returns {@code true} 10883 * <li> {@link #getType(int) getType(codePoint)} 10884 * returns {@code LETTER_NUMBER}. 10885 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10886 * {@code Other_ID_Start}</a> character. 10887 * </ul> 10888 * <p> 10889 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10890 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10891 * with the following profile of UAX31: 10892 * <pre> 10893 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10894 * </pre> 10895 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10896 * compatibility. 10897 * 10898 * @param codePoint the character (Unicode code point) to be tested. 10899 * @return {@code true} if the character may start a Unicode 10900 * identifier; {@code false} otherwise. 10901 * 10902 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10903 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10904 * @see Character#isJavaIdentifierStart(int) 10905 * @see Character#isLetter(int) 10906 * @see Character#isUnicodeIdentifierPart(int) 10907 * @since 1.5 10908 */ 10909 public static boolean isUnicodeIdentifierStart(int codePoint) { 10910 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10911 } 10912 10913 /** 10914 * Determines if the specified character may be part of a Unicode 10915 * identifier as other than the first character. 10916 * <p> 10917 * A character may be part of a Unicode identifier if and only if 10918 * one of the following statements is true: 10919 * <ul> 10920 * <li> it is a letter 10921 * <li> it is a connecting punctuation character (such as {@code '_'}) 10922 * <li> it is a digit 10923 * <li> it is a numeric letter (such as a Roman numeral character) 10924 * <li> it is a combining mark 10925 * <li> it is a non-spacing mark 10926 * <li> {@code isIdentifierIgnorable} returns 10927 * {@code true} for this character. 10928 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10929 * {@code Other_ID_Start}</a> character. 10930 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10931 * {@code Other_ID_Continue}</a> character. 10932 * </ul> 10933 * <p> 10934 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10935 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10936 * with the following profile of UAX31: 10937 * <pre> 10938 * Continue := Start + ID_Continue + ignorable 10939 * Medial := empty 10940 * ignorable := isIdentifierIgnorable(char) returns true for the character 10941 * </pre> 10942 * {@code ignorable} is added to {@code Continue} for backward 10943 * compatibility. 10944 * 10945 * <p><b>Note:</b> This method cannot handle <a 10946 * href="#supplementary"> supplementary characters</a>. To support 10947 * all Unicode characters, including supplementary characters, use 10948 * the {@link #isUnicodeIdentifierPart(int)} method. 10949 * 10950 * @param ch the character to be tested. 10951 * @return {@code true} if the character may be part of a 10952 * Unicode identifier; {@code false} otherwise. 10953 * 10954 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10955 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10956 * @see Character#isIdentifierIgnorable(char) 10957 * @see Character#isJavaIdentifierPart(char) 10958 * @see Character#isLetterOrDigit(char) 10959 * @see Character#isUnicodeIdentifierStart(char) 10960 * @since 1.1 10961 */ 10962 public static boolean isUnicodeIdentifierPart(char ch) { 10963 return isUnicodeIdentifierPart((int)ch); 10964 } 10965 10966 /** 10967 * Determines if the specified character (Unicode code point) may be part of a Unicode 10968 * identifier as other than the first character. 10969 * <p> 10970 * A character may be part of a Unicode identifier if and only if 10971 * one of the following statements is true: 10972 * <ul> 10973 * <li> it is a letter 10974 * <li> it is a connecting punctuation character (such as {@code '_'}) 10975 * <li> it is a digit 10976 * <li> it is a numeric letter (such as a Roman numeral character) 10977 * <li> it is a combining mark 10978 * <li> it is a non-spacing mark 10979 * <li> {@code isIdentifierIgnorable} returns 10980 * {@code true} for this character. 10981 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10982 * {@code Other_ID_Start}</a> character. 10983 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10984 * {@code Other_ID_Continue}</a> character. 10985 * </ul> 10986 * <p> 10987 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10988 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10989 * with the following profile of UAX31: 10990 * <pre> 10991 * Continue := Start + ID_Continue + ignorable 10992 * Medial := empty 10993 * ignorable := isIdentifierIgnorable(int) returns true for the character 10994 * </pre> 10995 * {@code ignorable} is added to {@code Continue} for backward 10996 * compatibility. 10997 * 10998 * @param codePoint the character (Unicode code point) to be tested. 10999 * @return {@code true} if the character may be part of a 11000 * Unicode identifier; {@code false} otherwise. 11001 * 11002 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 11003 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 11004 * @see Character#isIdentifierIgnorable(int) 11005 * @see Character#isJavaIdentifierPart(int) 11006 * @see Character#isLetterOrDigit(int) 11007 * @see Character#isUnicodeIdentifierStart(int) 11008 * @since 1.5 11009 */ 11010 public static boolean isUnicodeIdentifierPart(int codePoint) { 11011 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 11012 } 11013 11014 /** 11015 * Determines if the specified character should be regarded as 11016 * an ignorable character in a Java identifier or a Unicode identifier. 11017 * <p> 11018 * The following Unicode characters are ignorable in a Java identifier 11019 * or a Unicode identifier: 11020 * <ul> 11021 * <li>ISO control characters that are not whitespace 11022 * <ul> 11023 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11024 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11025 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11026 * </ul> 11027 * 11028 * <li>all characters that have the {@code FORMAT} general 11029 * category value 11030 * </ul> 11031 * 11032 * <p><b>Note:</b> This method cannot handle <a 11033 * href="#supplementary"> supplementary characters</a>. To support 11034 * all Unicode characters, including supplementary characters, use 11035 * the {@link #isIdentifierIgnorable(int)} method. 11036 * 11037 * @param ch the character to be tested. 11038 * @return {@code true} if the character is an ignorable control 11039 * character that may be part of a Java or Unicode identifier; 11040 * {@code false} otherwise. 11041 * @see Character#isJavaIdentifierPart(char) 11042 * @see Character#isUnicodeIdentifierPart(char) 11043 * @since 1.1 11044 */ 11045 public static boolean isIdentifierIgnorable(char ch) { 11046 return isIdentifierIgnorable((int)ch); 11047 } 11048 11049 /** 11050 * Determines if the specified character (Unicode code point) should be regarded as 11051 * an ignorable character in a Java identifier or a Unicode identifier. 11052 * <p> 11053 * The following Unicode characters are ignorable in a Java identifier 11054 * or a Unicode identifier: 11055 * <ul> 11056 * <li>ISO control characters that are not whitespace 11057 * <ul> 11058 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11059 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11060 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11061 * </ul> 11062 * 11063 * <li>all characters that have the {@code FORMAT} general 11064 * category value 11065 * </ul> 11066 * 11067 * @param codePoint the character (Unicode code point) to be tested. 11068 * @return {@code true} if the character is an ignorable control 11069 * character that may be part of a Java or Unicode identifier; 11070 * {@code false} otherwise. 11071 * @see Character#isJavaIdentifierPart(int) 11072 * @see Character#isUnicodeIdentifierPart(int) 11073 * @since 1.5 11074 */ 11075 public static boolean isIdentifierIgnorable(int codePoint) { 11076 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 11077 } 11078 11079 /** 11080 * Determines if the specified character (Unicode code point) is an Emoji. 11081 * <p> 11082 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 11083 * property, defined in 11084 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11085 * Unicode Emoji (Technical Standard #51)</a>. 11086 * 11087 * @param codePoint the character (Unicode code point) to be tested. 11088 * @return {@code true} if the character is an Emoji; 11089 * {@code false} otherwise. 11090 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11091 * @since 21 11092 */ 11093 public static boolean isEmoji(int codePoint) { 11094 return CharacterData.of(codePoint).isEmoji(codePoint); 11095 } 11096 11097 /** 11098 * Determines if the specified character (Unicode code point) has the 11099 * Emoji Presentation property by default. 11100 * <p> 11101 * A character is considered to have the Emoji Presentation property if and 11102 * only if it has the {@code Emoji_Presentation} property, defined in 11103 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11104 * Unicode Emoji (Technical Standard #51)</a>. 11105 * 11106 * @param codePoint the character (Unicode code point) to be tested. 11107 * @return {@code true} if the character has the Emoji Presentation 11108 * property; {@code false} otherwise. 11109 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11110 * @since 21 11111 */ 11112 public static boolean isEmojiPresentation(int codePoint) { 11113 return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 11114 } 11115 11116 /** 11117 * Determines if the specified character (Unicode code point) is an 11118 * Emoji Modifier. 11119 * <p> 11120 * A character is considered to be an Emoji Modifier if and only if it has 11121 * the {@code Emoji_Modifier} property, defined in 11122 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11123 * Unicode Emoji (Technical Standard #51)</a>. 11124 * 11125 * @param codePoint the character (Unicode code point) to be tested. 11126 * @return {@code true} if the character is an Emoji Modifier; 11127 * {@code false} otherwise. 11128 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11129 * @since 21 11130 */ 11131 public static boolean isEmojiModifier(int codePoint) { 11132 return CharacterData.of(codePoint).isEmojiModifier(codePoint); 11133 } 11134 11135 /** 11136 * Determines if the specified character (Unicode code point) is an 11137 * Emoji Modifier Base. 11138 * <p> 11139 * A character is considered to be an Emoji Modifier Base if and only if it has 11140 * the {@code Emoji_Modifier_Base} property, defined in 11141 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11142 * Unicode Emoji (Technical Standard #51)</a>. 11143 * 11144 * @param codePoint the character (Unicode code point) to be tested. 11145 * @return {@code true} if the character is an Emoji Modifier Base; 11146 * {@code false} otherwise. 11147 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11148 * @since 21 11149 */ 11150 public static boolean isEmojiModifierBase(int codePoint) { 11151 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 11152 } 11153 11154 /** 11155 * Determines if the specified character (Unicode code point) is an 11156 * Emoji Component. 11157 * <p> 11158 * A character is considered to be an Emoji Component if and only if it has 11159 * the {@code Emoji_Component} property, defined in 11160 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11161 * Unicode Emoji (Technical Standard #51)</a>. 11162 * 11163 * @param codePoint the character (Unicode code point) to be tested. 11164 * @return {@code true} if the character is an Emoji Component; 11165 * {@code false} otherwise. 11166 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11167 * @since 21 11168 */ 11169 public static boolean isEmojiComponent(int codePoint) { 11170 return CharacterData.of(codePoint).isEmojiComponent(codePoint); 11171 } 11172 11173 /** 11174 * Determines if the specified character (Unicode code point) is 11175 * an Extended Pictographic. 11176 * <p> 11177 * A character is considered to be an Extended Pictographic if and only if it has 11178 * the {@code Extended_Pictographic} property, defined in 11179 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11180 * Unicode Emoji (Technical Standard #51)</a>. 11181 * 11182 * @param codePoint the character (Unicode code point) to be tested. 11183 * @return {@code true} if the character is an Extended Pictographic; 11184 * {@code false} otherwise. 11185 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11186 * @since 21 11187 */ 11188 public static boolean isExtendedPictographic(int codePoint) { 11189 return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 11190 } 11191 11192 /** 11193 * Converts the character argument to lowercase using case 11194 * mapping information from the UnicodeData file. 11195 * <p> 11196 * Note that 11197 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 11198 * does not always return {@code true} for some ranges of 11199 * characters, particularly those that are symbols or ideographs. 11200 * 11201 * <p>In general, {@link String#toLowerCase()} should be used to map 11202 * characters to lowercase. {@code String} case mapping methods 11203 * have several benefits over {@code Character} case mapping methods. 11204 * {@code String} case mapping methods can perform locale-sensitive 11205 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11206 * the {@code Character} case mapping methods cannot. 11207 * 11208 * <p><b>Note:</b> This method cannot handle <a 11209 * href="#supplementary"> supplementary characters</a>. To support 11210 * all Unicode characters, including supplementary characters, use 11211 * the {@link #toLowerCase(int)} method. 11212 * 11213 * @param ch the character to be converted. 11214 * @return the lowercase equivalent of the character, if any; 11215 * otherwise, the character itself. 11216 * @see Character#isLowerCase(char) 11217 * @see String#toLowerCase() 11218 */ 11219 public static char toLowerCase(char ch) { 11220 return (char)toLowerCase((int)ch); 11221 } 11222 11223 /** 11224 * Converts the character (Unicode code point) argument to 11225 * lowercase using case mapping information from the UnicodeData 11226 * file. 11227 * 11228 * <p> Note that 11229 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 11230 * does not always return {@code true} for some ranges of 11231 * characters, particularly those that are symbols or ideographs. 11232 * 11233 * <p>In general, {@link String#toLowerCase()} should be used to map 11234 * characters to lowercase. {@code String} case mapping methods 11235 * have several benefits over {@code Character} case mapping methods. 11236 * {@code String} case mapping methods can perform locale-sensitive 11237 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11238 * the {@code Character} case mapping methods cannot. 11239 * 11240 * @param codePoint the character (Unicode code point) to be converted. 11241 * @return the lowercase equivalent of the character (Unicode code 11242 * point), if any; otherwise, the character itself. 11243 * @see Character#isLowerCase(int) 11244 * @see String#toLowerCase() 11245 * 11246 * @since 1.5 11247 */ 11248 public static int toLowerCase(int codePoint) { 11249 return CharacterData.of(codePoint).toLowerCase(codePoint); 11250 } 11251 11252 /** 11253 * Converts the character argument to uppercase using case mapping 11254 * information from the UnicodeData file. 11255 * <p> 11256 * Note that 11257 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 11258 * does not always return {@code true} for some ranges of 11259 * characters, particularly those that are symbols or ideographs. 11260 * 11261 * <p>In general, {@link String#toUpperCase()} should be used to map 11262 * characters to uppercase. {@code String} case mapping methods 11263 * have several benefits over {@code Character} case mapping methods. 11264 * {@code String} case mapping methods can perform locale-sensitive 11265 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11266 * the {@code Character} case mapping methods cannot. 11267 * 11268 * <p><b>Note:</b> This method cannot handle <a 11269 * href="#supplementary"> supplementary characters</a>. To support 11270 * all Unicode characters, including supplementary characters, use 11271 * the {@link #toUpperCase(int)} method. 11272 * 11273 * @param ch the character to be converted. 11274 * @return the uppercase equivalent of the character, if any; 11275 * otherwise, the character itself. 11276 * @see Character#isUpperCase(char) 11277 * @see String#toUpperCase() 11278 */ 11279 public static char toUpperCase(char ch) { 11280 return (char)toUpperCase((int)ch); 11281 } 11282 11283 /** 11284 * Converts the character (Unicode code point) argument to 11285 * uppercase using case mapping information from the UnicodeData 11286 * file. 11287 * 11288 * <p>Note that 11289 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11290 * does not always return {@code true} for some ranges of 11291 * characters, particularly those that are symbols or ideographs. 11292 * 11293 * <p>In general, {@link String#toUpperCase()} should be used to map 11294 * characters to uppercase. {@code String} case mapping methods 11295 * have several benefits over {@code Character} case mapping methods. 11296 * {@code String} case mapping methods can perform locale-sensitive 11297 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11298 * the {@code Character} case mapping methods cannot. 11299 * 11300 * @param codePoint the character (Unicode code point) to be converted. 11301 * @return the uppercase equivalent of the character, if any; 11302 * otherwise, the character itself. 11303 * @see Character#isUpperCase(int) 11304 * @see String#toUpperCase() 11305 * 11306 * @since 1.5 11307 */ 11308 public static int toUpperCase(int codePoint) { 11309 return CharacterData.of(codePoint).toUpperCase(codePoint); 11310 } 11311 11312 /** 11313 * Converts the character argument to titlecase using case mapping 11314 * information from the UnicodeData file. If a character has no 11315 * explicit titlecase mapping and is not itself a titlecase char 11316 * according to UnicodeData, then the uppercase mapping is 11317 * returned as an equivalent titlecase mapping. If the 11318 * {@code char} argument is already a titlecase 11319 * {@code char}, the same {@code char} value will be 11320 * returned. 11321 * <p> 11322 * Note that 11323 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11324 * does not always return {@code true} for some ranges of 11325 * characters. 11326 * 11327 * <p><b>Note:</b> This method cannot handle <a 11328 * href="#supplementary"> supplementary characters</a>. To support 11329 * all Unicode characters, including supplementary characters, use 11330 * the {@link #toTitleCase(int)} method. 11331 * 11332 * @param ch the character to be converted. 11333 * @return the titlecase equivalent of the character, if any; 11334 * otherwise, the character itself. 11335 * @see Character#isTitleCase(char) 11336 * @see Character#toLowerCase(char) 11337 * @see Character#toUpperCase(char) 11338 * @since 1.0.2 11339 */ 11340 public static char toTitleCase(char ch) { 11341 return (char)toTitleCase((int)ch); 11342 } 11343 11344 /** 11345 * Converts the character (Unicode code point) argument to titlecase using case mapping 11346 * information from the UnicodeData file. If a character has no 11347 * explicit titlecase mapping and is not itself a titlecase char 11348 * according to UnicodeData, then the uppercase mapping is 11349 * returned as an equivalent titlecase mapping. If the 11350 * character argument is already a titlecase 11351 * character, the same character value will be 11352 * returned. 11353 * 11354 * <p>Note that 11355 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11356 * does not always return {@code true} for some ranges of 11357 * characters. 11358 * 11359 * @param codePoint the character (Unicode code point) to be converted. 11360 * @return the titlecase equivalent of the character, if any; 11361 * otherwise, the character itself. 11362 * @see Character#isTitleCase(int) 11363 * @see Character#toLowerCase(int) 11364 * @see Character#toUpperCase(int) 11365 * @since 1.5 11366 */ 11367 public static int toTitleCase(int codePoint) { 11368 return CharacterData.of(codePoint).toTitleCase(codePoint); 11369 } 11370 11371 /** 11372 * Returns the numeric value of the character {@code ch} in the 11373 * specified radix. 11374 * <p> 11375 * If the radix is not in the range {@code MIN_RADIX} ≤ 11376 * {@code radix} ≤ {@code MAX_RADIX} or if the 11377 * value of {@code ch} is not a valid digit in the specified 11378 * radix, {@code -1} is returned. A character is a valid digit 11379 * if at least one of the following is true: 11380 * <ul> 11381 * <li>The method {@code isDigit} is {@code true} of the character 11382 * and the Unicode decimal digit value of the character (or its 11383 * single-character decomposition) is less than the specified radix. 11384 * In this case the decimal digit value is returned. 11385 * <li>The character is one of the uppercase Latin letters 11386 * {@code 'A'} through {@code 'Z'} and its code is less than 11387 * {@code radix + 'A' - 10}. 11388 * In this case, {@code ch - 'A' + 10} 11389 * is returned. 11390 * <li>The character is one of the lowercase Latin letters 11391 * {@code 'a'} through {@code 'z'} and its code is less than 11392 * {@code radix + 'a' - 10}. 11393 * In this case, {@code ch - 'a' + 10} 11394 * is returned. 11395 * <li>The character is one of the fullwidth uppercase Latin letters A 11396 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11397 * and its code is less than 11398 * {@code radix + '\u005CuFF21' - 10}. 11399 * In this case, {@code ch - '\u005CuFF21' + 10} 11400 * is returned. 11401 * <li>The character is one of the fullwidth lowercase Latin letters a 11402 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11403 * and its code is less than 11404 * {@code radix + '\u005CuFF41' - 10}. 11405 * In this case, {@code ch - '\u005CuFF41' + 10} 11406 * is returned. 11407 * </ul> 11408 * 11409 * <p><b>Note:</b> This method cannot handle <a 11410 * href="#supplementary"> supplementary characters</a>. To support 11411 * all Unicode characters, including supplementary characters, use 11412 * the {@link #digit(int, int)} method. 11413 * 11414 * @param ch the character to be converted. 11415 * @param radix the radix. 11416 * @return the numeric value represented by the character in the 11417 * specified radix. 11418 * @see Character#forDigit(int, int) 11419 * @see Character#isDigit(char) 11420 */ 11421 public static int digit(char ch, int radix) { 11422 return digit((int)ch, radix); 11423 } 11424 11425 /** 11426 * Returns the numeric value of the specified character (Unicode 11427 * code point) in the specified radix. 11428 * 11429 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11430 * {@code radix} ≤ {@code MAX_RADIX} or if the 11431 * character is not a valid digit in the specified 11432 * radix, {@code -1} is returned. A character is a valid digit 11433 * if at least one of the following is true: 11434 * <ul> 11435 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11436 * and the Unicode decimal digit value of the character (or its 11437 * single-character decomposition) is less than the specified radix. 11438 * In this case the decimal digit value is returned. 11439 * <li>The character is one of the uppercase Latin letters 11440 * {@code 'A'} through {@code 'Z'} and its code is less than 11441 * {@code radix + 'A' - 10}. 11442 * In this case, {@code codePoint - 'A' + 10} 11443 * is returned. 11444 * <li>The character is one of the lowercase Latin letters 11445 * {@code 'a'} through {@code 'z'} and its code is less than 11446 * {@code radix + 'a' - 10}. 11447 * In this case, {@code codePoint - 'a' + 10} 11448 * is returned. 11449 * <li>The character is one of the fullwidth uppercase Latin letters A 11450 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11451 * and its code is less than 11452 * {@code radix + '\u005CuFF21' - 10}. 11453 * In this case, 11454 * {@code codePoint - '\u005CuFF21' + 10} 11455 * is returned. 11456 * <li>The character is one of the fullwidth lowercase Latin letters a 11457 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11458 * and its code is less than 11459 * {@code radix + '\u005CuFF41'- 10}. 11460 * In this case, 11461 * {@code codePoint - '\u005CuFF41' + 10} 11462 * is returned. 11463 * </ul> 11464 * 11465 * @param codePoint the character (Unicode code point) to be converted. 11466 * @param radix the radix. 11467 * @return the numeric value represented by the character in the 11468 * specified radix. 11469 * @see Character#forDigit(int, int) 11470 * @see Character#isDigit(int) 11471 * @since 1.5 11472 */ 11473 public static int digit(int codePoint, int radix) { 11474 return CharacterData.of(codePoint).digit(codePoint, radix); 11475 } 11476 11477 /** 11478 * Returns the {@code int} value that the specified Unicode 11479 * character represents. For example, the character 11480 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11481 * an int with a value of 50. 11482 * <p> 11483 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11484 * {@code '\u005Cu005A'}), lowercase 11485 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11486 * full width variant ({@code '\u005CuFF21'} through 11487 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11488 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11489 * through 35. This is independent of the Unicode specification, 11490 * which does not assign numeric values to these {@code char} 11491 * values. 11492 * <p> 11493 * If the character does not have a numeric value, then -1 is returned. 11494 * If the character has a numeric value that cannot be represented as a 11495 * nonnegative integer (for example, a fractional value), then -2 11496 * is returned. 11497 * 11498 * <p><b>Note:</b> This method cannot handle <a 11499 * href="#supplementary"> supplementary characters</a>. To support 11500 * all Unicode characters, including supplementary characters, use 11501 * the {@link #getNumericValue(int)} method. 11502 * 11503 * @param ch the character to be converted. 11504 * @return the numeric value of the character, as a nonnegative {@code int} 11505 * value; -2 if the character has a numeric value but the value 11506 * can not be represented as a nonnegative {@code int} value; 11507 * -1 if the character has no numeric value. 11508 * @see Character#forDigit(int, int) 11509 * @see Character#isDigit(char) 11510 * @since 1.1 11511 */ 11512 public static int getNumericValue(char ch) { 11513 return getNumericValue((int)ch); 11514 } 11515 11516 /** 11517 * Returns the {@code int} value that the specified 11518 * character (Unicode code point) represents. For example, the character 11519 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11520 * an {@code int} with a value of 50. 11521 * <p> 11522 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11523 * {@code '\u005Cu005A'}), lowercase 11524 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11525 * full width variant ({@code '\u005CuFF21'} through 11526 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11527 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11528 * through 35. This is independent of the Unicode specification, 11529 * which does not assign numeric values to these {@code char} 11530 * values. 11531 * <p> 11532 * If the character does not have a numeric value, then -1 is returned. 11533 * If the character has a numeric value that cannot be represented as a 11534 * nonnegative integer (for example, a fractional value), then -2 11535 * is returned. 11536 * 11537 * @param codePoint the character (Unicode code point) to be converted. 11538 * @return the numeric value of the character, as a nonnegative {@code int} 11539 * value; -2 if the character has a numeric value but the value 11540 * can not be represented as a nonnegative {@code int} value; 11541 * -1 if the character has no numeric value. 11542 * @see Character#forDigit(int, int) 11543 * @see Character#isDigit(int) 11544 * @since 1.5 11545 */ 11546 public static int getNumericValue(int codePoint) { 11547 return CharacterData.of(codePoint).getNumericValue(codePoint); 11548 } 11549 11550 /** 11551 * Determines if the specified character is ISO-LATIN-1 white space. 11552 * This method returns {@code true} for the following five 11553 * characters only: 11554 * <table class="striped"> 11555 * <caption style="display:none">truechars</caption> 11556 * <thead> 11557 * <tr><th scope="col">Character 11558 * <th scope="col">Code 11559 * <th scope="col">Name 11560 * </thead> 11561 * <tbody> 11562 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11563 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11564 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11565 * <td>{@code NEW LINE}</td></tr> 11566 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11567 * <td>{@code FORM FEED}</td></tr> 11568 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11569 * <td>{@code CARRIAGE RETURN}</td></tr> 11570 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11571 * <td>{@code SPACE}</td></tr> 11572 * </tbody> 11573 * </table> 11574 * 11575 * @param ch the character to be tested. 11576 * @return {@code true} if the character is ISO-LATIN-1 white 11577 * space; {@code false} otherwise. 11578 * @see Character#isSpaceChar(char) 11579 * @see Character#isWhitespace(char) 11580 * @deprecated Replaced by isWhitespace(char). 11581 */ 11582 @Deprecated(since="1.1") 11583 public static boolean isSpace(char ch) { 11584 return (ch <= 0x0020) && 11585 (((((1L << 0x0009) | 11586 (1L << 0x000A) | 11587 (1L << 0x000C) | 11588 (1L << 0x000D) | 11589 (1L << 0x0020)) >> ch) & 1L) != 0); 11590 } 11591 11592 11593 /** 11594 * Determines if the specified character is a Unicode space character. 11595 * A character is considered to be a space character if and only if 11596 * it is specified to be a space character by the Unicode Standard. This 11597 * method returns true if the character's general category type is any of 11598 * the following: 11599 * <ul> 11600 * <li> {@code SPACE_SEPARATOR} 11601 * <li> {@code LINE_SEPARATOR} 11602 * <li> {@code PARAGRAPH_SEPARATOR} 11603 * </ul> 11604 * 11605 * <p><b>Note:</b> This method cannot handle <a 11606 * href="#supplementary"> supplementary characters</a>. To support 11607 * all Unicode characters, including supplementary characters, use 11608 * the {@link #isSpaceChar(int)} method. 11609 * 11610 * @param ch the character to be tested. 11611 * @return {@code true} if the character is a space character; 11612 * {@code false} otherwise. 11613 * @see Character#isWhitespace(char) 11614 * @since 1.1 11615 */ 11616 public static boolean isSpaceChar(char ch) { 11617 return isSpaceChar((int)ch); 11618 } 11619 11620 /** 11621 * Determines if the specified character (Unicode code point) is a 11622 * Unicode space character. A character is considered to be a 11623 * space character if and only if it is specified to be a space 11624 * character by the Unicode Standard. This method returns true if 11625 * the character's general category type is any of the following: 11626 * 11627 * <ul> 11628 * <li> {@link #SPACE_SEPARATOR} 11629 * <li> {@link #LINE_SEPARATOR} 11630 * <li> {@link #PARAGRAPH_SEPARATOR} 11631 * </ul> 11632 * 11633 * @param codePoint the character (Unicode code point) to be tested. 11634 * @return {@code true} if the character is a space character; 11635 * {@code false} otherwise. 11636 * @see Character#isWhitespace(int) 11637 * @since 1.5 11638 */ 11639 public static boolean isSpaceChar(int codePoint) { 11640 return ((((1 << Character.SPACE_SEPARATOR) | 11641 (1 << Character.LINE_SEPARATOR) | 11642 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11643 != 0; 11644 } 11645 11646 /** 11647 * Determines if the specified character is white space according to Java. 11648 * A character is a Java whitespace character if and only if it satisfies 11649 * one of the following criteria: 11650 * <ul> 11651 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11652 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11653 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11654 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11655 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11656 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11657 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11658 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11659 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11660 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11661 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11662 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11663 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11664 * </ul> 11665 * 11666 * <p><b>Note:</b> This method cannot handle <a 11667 * href="#supplementary"> supplementary characters</a>. To support 11668 * all Unicode characters, including supplementary characters, use 11669 * the {@link #isWhitespace(int)} method. 11670 * 11671 * @param ch the character to be tested. 11672 * @return {@code true} if the character is a Java whitespace 11673 * character; {@code false} otherwise. 11674 * @see Character#isSpaceChar(char) 11675 * @since 1.1 11676 */ 11677 public static boolean isWhitespace(char ch) { 11678 return isWhitespace((int)ch); 11679 } 11680 11681 /** 11682 * Determines if the specified character (Unicode code point) is 11683 * white space according to Java. A character is a Java 11684 * whitespace character if and only if it satisfies one of the 11685 * following criteria: 11686 * <ul> 11687 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11688 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11689 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11690 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11691 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11692 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11693 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11694 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11695 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11696 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11697 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11698 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11699 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11700 * </ul> 11701 * 11702 * @param codePoint the character (Unicode code point) to be tested. 11703 * @return {@code true} if the character is a Java whitespace 11704 * character; {@code false} otherwise. 11705 * @see Character#isSpaceChar(int) 11706 * @since 1.5 11707 */ 11708 public static boolean isWhitespace(int codePoint) { 11709 return CharacterData.of(codePoint).isWhitespace(codePoint); 11710 } 11711 11712 /** 11713 * Determines if the specified character is an ISO control 11714 * character. A character is considered to be an ISO control 11715 * character if its code is in the range {@code '\u005Cu0000'} 11716 * through {@code '\u005Cu001F'} or in the range 11717 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11718 * 11719 * <p><b>Note:</b> This method cannot handle <a 11720 * href="#supplementary"> supplementary characters</a>. To support 11721 * all Unicode characters, including supplementary characters, use 11722 * the {@link #isISOControl(int)} method. 11723 * 11724 * @param ch the character to be tested. 11725 * @return {@code true} if the character is an ISO control character; 11726 * {@code false} otherwise. 11727 * 11728 * @see Character#isSpaceChar(char) 11729 * @see Character#isWhitespace(char) 11730 * @since 1.1 11731 */ 11732 public static boolean isISOControl(char ch) { 11733 return isISOControl((int)ch); 11734 } 11735 11736 /** 11737 * Determines if the referenced character (Unicode code point) is an ISO control 11738 * character. A character is considered to be an ISO control 11739 * character if its code is in the range {@code '\u005Cu0000'} 11740 * through {@code '\u005Cu001F'} or in the range 11741 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11742 * 11743 * @param codePoint the character (Unicode code point) to be tested. 11744 * @return {@code true} if the character is an ISO control character; 11745 * {@code false} otherwise. 11746 * @see Character#isSpaceChar(int) 11747 * @see Character#isWhitespace(int) 11748 * @since 1.5 11749 */ 11750 public static boolean isISOControl(int codePoint) { 11751 // Optimized form of: 11752 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11753 // (codePoint >= 0x7F && codePoint <= 0x9F); 11754 return codePoint <= 0x9F && 11755 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11756 } 11757 11758 /** 11759 * Returns a value indicating a character's general category. 11760 * 11761 * <p><b>Note:</b> This method cannot handle <a 11762 * href="#supplementary"> supplementary characters</a>. To support 11763 * all Unicode characters, including supplementary characters, use 11764 * the {@link #getType(int)} method. 11765 * 11766 * @param ch the character to be tested. 11767 * @return a value of type {@code int} representing the 11768 * character's general category. 11769 * @see Character#COMBINING_SPACING_MARK 11770 * @see Character#CONNECTOR_PUNCTUATION 11771 * @see Character#CONTROL 11772 * @see Character#CURRENCY_SYMBOL 11773 * @see Character#DASH_PUNCTUATION 11774 * @see Character#DECIMAL_DIGIT_NUMBER 11775 * @see Character#ENCLOSING_MARK 11776 * @see Character#END_PUNCTUATION 11777 * @see Character#FINAL_QUOTE_PUNCTUATION 11778 * @see Character#FORMAT 11779 * @see Character#INITIAL_QUOTE_PUNCTUATION 11780 * @see Character#LETTER_NUMBER 11781 * @see Character#LINE_SEPARATOR 11782 * @see Character#LOWERCASE_LETTER 11783 * @see Character#MATH_SYMBOL 11784 * @see Character#MODIFIER_LETTER 11785 * @see Character#MODIFIER_SYMBOL 11786 * @see Character#NON_SPACING_MARK 11787 * @see Character#OTHER_LETTER 11788 * @see Character#OTHER_NUMBER 11789 * @see Character#OTHER_PUNCTUATION 11790 * @see Character#OTHER_SYMBOL 11791 * @see Character#PARAGRAPH_SEPARATOR 11792 * @see Character#PRIVATE_USE 11793 * @see Character#SPACE_SEPARATOR 11794 * @see Character#START_PUNCTUATION 11795 * @see Character#SURROGATE 11796 * @see Character#TITLECASE_LETTER 11797 * @see Character#UNASSIGNED 11798 * @see Character#UPPERCASE_LETTER 11799 * @since 1.1 11800 */ 11801 public static int getType(char ch) { 11802 return getType((int)ch); 11803 } 11804 11805 /** 11806 * Returns a value indicating a character's general category. 11807 * 11808 * @param codePoint the character (Unicode code point) to be tested. 11809 * @return a value of type {@code int} representing the 11810 * character's general category. 11811 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11812 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11813 * @see Character#CONTROL CONTROL 11814 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11815 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11816 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11817 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11818 * @see Character#END_PUNCTUATION END_PUNCTUATION 11819 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11820 * @see Character#FORMAT FORMAT 11821 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11822 * @see Character#LETTER_NUMBER LETTER_NUMBER 11823 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11824 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11825 * @see Character#MATH_SYMBOL MATH_SYMBOL 11826 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11827 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11828 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11829 * @see Character#OTHER_LETTER OTHER_LETTER 11830 * @see Character#OTHER_NUMBER OTHER_NUMBER 11831 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11832 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11833 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11834 * @see Character#PRIVATE_USE PRIVATE_USE 11835 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11836 * @see Character#START_PUNCTUATION START_PUNCTUATION 11837 * @see Character#SURROGATE SURROGATE 11838 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11839 * @see Character#UNASSIGNED UNASSIGNED 11840 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11841 * @since 1.5 11842 */ 11843 public static int getType(int codePoint) { 11844 return CharacterData.of(codePoint).getType(codePoint); 11845 } 11846 11847 /** 11848 * Determines the character representation for a specific digit in 11849 * the specified radix. If the value of {@code radix} is not a 11850 * valid radix, or the value of {@code digit} is not a valid 11851 * digit in the specified radix, the null character 11852 * ({@code '\u005Cu0000'}) is returned. 11853 * <p> 11854 * The {@code radix} argument is valid if it is greater than or 11855 * equal to {@code MIN_RADIX} and less than or equal to 11856 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11857 * {@code 0 <= digit < radix}. 11858 * <p> 11859 * If the digit is less than 10, then 11860 * {@code '0' + digit} is returned. Otherwise, the value 11861 * {@code 'a' + digit - 10} is returned. 11862 * 11863 * @param digit the number to convert to a character. 11864 * @param radix the radix. 11865 * @return the {@code char} representation of the specified digit 11866 * in the specified radix. 11867 * @see Character#MIN_RADIX 11868 * @see Character#MAX_RADIX 11869 * @see Character#digit(char, int) 11870 */ 11871 public static char forDigit(int digit, int radix) { 11872 if ((digit >= radix) || (digit < 0)) { 11873 return '\0'; 11874 } 11875 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11876 return '\0'; 11877 } 11878 if (digit < 10) { 11879 return (char)('0' + digit); 11880 } 11881 return (char)('a' - 10 + digit); 11882 } 11883 11884 /** 11885 * Returns the Unicode directionality property for the given 11886 * character. Character directionality is used to calculate the 11887 * visual ordering of text. The directionality value of undefined 11888 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11889 * 11890 * <p><b>Note:</b> This method cannot handle <a 11891 * href="#supplementary"> supplementary characters</a>. To support 11892 * all Unicode characters, including supplementary characters, use 11893 * the {@link #getDirectionality(int)} method. 11894 * 11895 * @param ch {@code char} for which the directionality property 11896 * is requested. 11897 * @return the directionality property of the {@code char} value. 11898 * 11899 * @see Character#DIRECTIONALITY_UNDEFINED 11900 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11901 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11902 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11903 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11904 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11905 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11906 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11907 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11908 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11909 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11910 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11911 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11912 * @see Character#DIRECTIONALITY_WHITESPACE 11913 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11914 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11915 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11916 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11917 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11918 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11919 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11920 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11921 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11922 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11923 * @since 1.4 11924 */ 11925 public static byte getDirectionality(char ch) { 11926 return getDirectionality((int)ch); 11927 } 11928 11929 /** 11930 * Returns the Unicode directionality property for the given 11931 * character (Unicode code point). Character directionality is 11932 * used to calculate the visual ordering of text. The 11933 * directionality value of undefined character is {@link 11934 * #DIRECTIONALITY_UNDEFINED}. 11935 * 11936 * @param codePoint the character (Unicode code point) for which 11937 * the directionality property is requested. 11938 * @return the directionality property of the character. 11939 * 11940 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11941 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11942 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11943 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11944 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11945 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11946 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11947 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11948 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11949 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11950 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11951 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11952 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11953 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11954 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11955 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11956 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11957 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11958 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11959 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11960 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11961 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11962 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11963 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11964 * @since 1.5 11965 */ 11966 public static byte getDirectionality(int codePoint) { 11967 return CharacterData.of(codePoint).getDirectionality(codePoint); 11968 } 11969 11970 /** 11971 * Determines whether the character is mirrored according to the 11972 * Unicode specification. Mirrored characters should have their 11973 * glyphs horizontally mirrored when displayed in text that is 11974 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11975 * PARENTHESIS is semantically defined to be an <i>opening 11976 * parenthesis</i>. This will appear as a "(" in text that is 11977 * left-to-right but as a ")" in text that is right-to-left. 11978 * 11979 * <p><b>Note:</b> This method cannot handle <a 11980 * href="#supplementary"> supplementary characters</a>. To support 11981 * all Unicode characters, including supplementary characters, use 11982 * the {@link #isMirrored(int)} method. 11983 * 11984 * @param ch {@code char} for which the mirrored property is requested 11985 * @return {@code true} if the char is mirrored, {@code false} 11986 * if the {@code char} is not mirrored or is not defined. 11987 * @since 1.4 11988 */ 11989 public static boolean isMirrored(char ch) { 11990 return isMirrored((int)ch); 11991 } 11992 11993 /** 11994 * Determines whether the specified character (Unicode code point) 11995 * is mirrored according to the Unicode specification. Mirrored 11996 * characters should have their glyphs horizontally mirrored when 11997 * displayed in text that is right-to-left. For example, 11998 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 11999 * defined to be an <i>opening parenthesis</i>. This will appear 12000 * as a "(" in text that is left-to-right but as a ")" in text 12001 * that is right-to-left. 12002 * 12003 * @param codePoint the character (Unicode code point) to be tested. 12004 * @return {@code true} if the character is mirrored, {@code false} 12005 * if the character is not mirrored or is not defined. 12006 * @since 1.5 12007 */ 12008 public static boolean isMirrored(int codePoint) { 12009 return CharacterData.of(codePoint).isMirrored(codePoint); 12010 } 12011 12012 /** 12013 * Compares two {@code Character} objects numerically. 12014 * 12015 * @param anotherCharacter the {@code Character} to be compared. 12016 * @return the value {@code 0} if the argument {@code Character} 12017 * is equal to this {@code Character}; a value less than 12018 * {@code 0} if this {@code Character} is numerically less 12019 * than the {@code Character} argument; and a value greater than 12020 * {@code 0} if this {@code Character} is numerically greater 12021 * than the {@code Character} argument (unsigned comparison). 12022 * Note that this is strictly a numerical comparison; it is not 12023 * locale-dependent. 12024 * @since 1.2 12025 */ 12026 public int compareTo(Character anotherCharacter) { 12027 return compare(this.value, anotherCharacter.value); 12028 } 12029 12030 /** 12031 * Compares two {@code char} values numerically. 12032 * The value returned is identical to what would be returned by: 12033 * <pre> 12034 * Character.valueOf(x).compareTo(Character.valueOf(y)) 12035 * </pre> 12036 * 12037 * @param x the first {@code char} to compare 12038 * @param y the second {@code char} to compare 12039 * @return the value {@code 0} if {@code x == y}; 12040 * a value less than {@code 0} if {@code x < y}; and 12041 * a value greater than {@code 0} if {@code x > y} 12042 * @since 1.7 12043 */ 12044 public static int compare(char x, char y) { 12045 return x - y; 12046 } 12047 12048 /** 12049 * Converts the character (Unicode code point) argument to uppercase using 12050 * information from the UnicodeData file. 12051 * 12052 * @param codePoint the character (Unicode code point) to be converted. 12053 * @return either the uppercase equivalent of the character, if 12054 * any, or an error flag ({@code Character.ERROR}) 12055 * that indicates that a 1:M {@code char} mapping exists. 12056 * @see Character#isLowerCase(char) 12057 * @see Character#isUpperCase(char) 12058 * @see Character#toLowerCase(char) 12059 * @see Character#toTitleCase(char) 12060 * @since 1.4 12061 */ 12062 static int toUpperCaseEx(int codePoint) { 12063 assert isValidCodePoint(codePoint); 12064 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 12065 } 12066 12067 /** 12068 * Converts the character (Unicode code point) argument to uppercase using case 12069 * mapping information from the SpecialCasing file in the Unicode 12070 * specification. If a character has no explicit uppercase 12071 * mapping, then the {@code char} itself is returned in the 12072 * {@code char[]}. 12073 * 12074 * @param codePoint the character (Unicode code point) to be converted. 12075 * @return a {@code char[]} with the uppercased character. 12076 * @since 1.4 12077 */ 12078 static char[] toUpperCaseCharArray(int codePoint) { 12079 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 12080 assert isBmpCodePoint(codePoint); 12081 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 12082 } 12083 12084 /** 12085 * The number of bits used to represent a {@code char} value in unsigned 12086 * binary form, constant {@code 16}. 12087 * 12088 * @since 1.5 12089 */ 12090 public static final int SIZE = 16; 12091 12092 /** 12093 * The number of bytes used to represent a {@code char} value in unsigned 12094 * binary form. 12095 * 12096 * @since 1.8 12097 */ 12098 public static final int BYTES = SIZE / Byte.SIZE; 12099 12100 /** 12101 * Returns the value obtained by reversing the order of the bytes in the 12102 * specified {@code char} value. 12103 * 12104 * @param ch The {@code char} of which to reverse the byte order. 12105 * @return the value obtained by reversing (or, equivalently, swapping) 12106 * the bytes in the specified {@code char} value. 12107 * @since 1.5 12108 */ 12109 @IntrinsicCandidate 12110 public static char reverseBytes(char ch) { 12111 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 12112 } 12113 12114 /** 12115 * Returns the name of the specified character 12116 * {@code codePoint}, or null if the code point is 12117 * {@link #UNASSIGNED unassigned}. 12118 * <p> 12119 * If the specified character is not assigned a name by 12120 * the <i>UnicodeData</i> file (part of the Unicode Character 12121 * Database maintained by the Unicode Consortium), the returned 12122 * name is the same as the result of the expression: 12123 * 12124 * <blockquote>{@code 12125 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12126 * + " " 12127 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12128 * 12129 * }</blockquote> 12130 * 12131 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 12132 * returned by this method follows the naming scheme in the 12133 * "Unicode Name Property" section of the Unicode Standard. For other 12134 * code points, such as Hangul/Ideographs, The name generation rule above 12135 * differs from the one defined in the Unicode Standard. 12136 * 12137 * @param codePoint the character (Unicode code point) 12138 * 12139 * @return the name of the specified character, or null if 12140 * the code point is unassigned. 12141 * 12142 * @throws IllegalArgumentException if the specified 12143 * {@code codePoint} is not a valid Unicode 12144 * code point. 12145 * 12146 * @since 1.7 12147 */ 12148 public static String getName(int codePoint) { 12149 if (!isValidCodePoint(codePoint)) { 12150 throw new IllegalArgumentException( 12151 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 12152 } 12153 String name = CharacterName.getInstance().getName(codePoint); 12154 if (name != null) 12155 return name; 12156 if (getType(codePoint) == UNASSIGNED) 12157 return null; 12158 UnicodeBlock block = UnicodeBlock.of(codePoint); 12159 if (block != null) 12160 return block.toString().replace('_', ' ') + " " 12161 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12162 // should never come here 12163 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12164 } 12165 12166 /** 12167 * Returns the code point value of the Unicode character specified by 12168 * the given character name. 12169 * <p> 12170 * If a character is not assigned a name by the <i>UnicodeData</i> 12171 * file (part of the Unicode Character Database maintained by the Unicode 12172 * Consortium), its name is defined as the result of the expression: 12173 * 12174 * <blockquote>{@code 12175 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12176 * + " " 12177 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12178 * 12179 * }</blockquote> 12180 * <p> 12181 * The {@code name} matching is case insensitive, with any leading and 12182 * trailing whitespace character removed. 12183 * 12184 * For the code points in the <i>UnicodeData</i> file, this method 12185 * recognizes the name which conforms to the name defined in the 12186 * "Unicode Name Property" section in the Unicode Standard. For other 12187 * code points, this method recognizes the name generated with 12188 * {@link #getName(int)} method. 12189 * 12190 * @param name the character name 12191 * 12192 * @return the code point value of the character specified by its name. 12193 * 12194 * @throws IllegalArgumentException if the specified {@code name} 12195 * is not a valid character name. 12196 * @throws NullPointerException if {@code name} is {@code null} 12197 * 12198 * @since 9 12199 */ 12200 public static int codePointOf(String name) { 12201 name = name.trim().toUpperCase(Locale.ROOT); 12202 int cp = CharacterName.getInstance().getCodePoint(name); 12203 if (cp != -1) 12204 return cp; 12205 try { 12206 int off = name.lastIndexOf(' '); 12207 if (off != -1) { 12208 cp = Integer.parseInt(name, off + 1, name.length(), 16); 12209 if (isValidCodePoint(cp) && name.equals(getName(cp))) 12210 return cp; 12211 } 12212 } catch (Exception x) {} 12213 throw new IllegalArgumentException("Unrecognized character name :" + name); 12214 } 12215 }