1 /* 2 * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import jdk.internal.misc.CDS; 29 import jdk.internal.value.DeserializeConstructor; 30 import jdk.internal.vm.annotation.IntrinsicCandidate; 31 import jdk.internal.vm.annotation.Stable; 32 33 import java.lang.constant.Constable; 34 import java.lang.constant.DynamicConstantDesc; 35 import java.util.Arrays; 36 import java.util.HashMap; 37 import java.util.Locale; 38 import java.util.Map; 39 import java.util.Objects; 40 import java.util.Optional; 41 42 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 43 import static java.lang.constant.ConstantDescs.CD_char; 44 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 45 46 /** 47 * The {@code Character} class is the {@linkplain 48 * java.lang##wrapperClass wrapper class} for values of the primitive 49 * type {@code char}. An object of type {@code Character} contains a 50 * single field whose type is {@code char}. 51 * 52 * <p>In addition, this class provides a large number of static methods for 53 * determining a character's category (lowercase letter, digit, etc.) 54 * and for converting characters from uppercase to lowercase and vice 55 * versa. 56 * 57 * <h2><a id="conformance">Unicode Conformance</a></h2> 58 * <p> 59 * The fields and methods of class {@code Character} are defined in terms 60 * of character information from the Unicode Standard, specifically the 61 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 62 * This file specifies properties including name and category for every 63 * assigned Unicode code point or character range. The file is available 64 * from the Unicode Consortium at 65 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 66 * <p> 67 * Character information is based on the Unicode Standard, version 15.1. 68 * <p> 69 * The Java platform has supported different versions of the Unicode 70 * Standard over time. Upgrades to newer versions of the Unicode Standard 71 * occurred in the following Java releases, each indicating the new version: 72 * <table class="striped"> 73 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 74 * <thead> 75 * <tr><th scope="col">Java release</th> 76 * <th scope="col">Unicode version</th></tr> 77 * </thead> 78 * <tbody> 79 * <tr><th scope="row" style="text-align:left">Java SE 22</th> 80 * <td>Unicode 15.1</td></tr> 81 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 82 * <td>Unicode 15.0</td></tr> 83 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 84 * <td>Unicode 14.0</td></tr> 85 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 86 * <td>Unicode 13.0</td></tr> 87 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 88 * <td>Unicode 12.1</td></tr> 89 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 90 * <td>Unicode 11.0</td></tr> 91 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 92 * <td>Unicode 10.0</td></tr> 93 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 94 * <td>Unicode 8.0</td></tr> 95 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 96 * <td>Unicode 6.2</td></tr> 97 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 98 * <td>Unicode 6.0</td></tr> 99 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 100 * <td>Unicode 4.0</td></tr> 101 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 102 * <td>Unicode 3.0</td></tr> 103 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 104 * <td>Unicode 2.0</td></tr> 105 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 106 * <td>Unicode 1.1.5</td></tr> 107 * </tbody> 108 * </table> 109 * Variations from these base Unicode versions, such as recognized appendixes, 110 * are documented elsewhere. 111 * <h2><a id="unicode">Unicode Character Representations</a></h2> 112 * 113 * <p>The {@code char} data type (and therefore the value that a 114 * {@code Character} object encapsulates) are based on the 115 * original Unicode specification, which defined characters as 116 * fixed-width 16-bit entities. The Unicode Standard has since been 117 * changed to allow for characters whose representation requires more 118 * than 16 bits. The range of legal <em>code point</em>s is now 119 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 120 * (Refer to the <a 121 * href="http://www.unicode.org/reports/tr27/#notation"><i> 122 * definition</i></a> of the U+<i>n</i> notation in the Unicode 123 * Standard.) 124 * 125 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 126 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 127 * <a id="supplementary">Characters</a> whose code points are greater 128 * than U+FFFF are called <em>supplementary character</em>s. The Java 129 * platform uses the UTF-16 representation in {@code char} arrays and 130 * in the {@code String} and {@code StringBuffer} classes. In 131 * this representation, supplementary characters are represented as a pair 132 * of {@code char} values, the first from the <em>high-surrogates</em> 133 * range, (\uD800-\uDBFF), the second from the 134 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 135 * 136 * <p>A {@code char} value, therefore, represents Basic 137 * Multilingual Plane (BMP) code points, including the surrogate 138 * code points, or code units of the UTF-16 encoding. An 139 * {@code int} value represents all Unicode code points, 140 * including supplementary code points. The lower (least significant) 141 * 21 bits of {@code int} are used to represent Unicode code 142 * points and the upper (most significant) 11 bits must be zero. 143 * Unless otherwise specified, the behavior with respect to 144 * supplementary characters and surrogate {@code char} values is 145 * as follows: 146 * 147 * <ul> 148 * <li>The methods that only accept a {@code char} value cannot support 149 * supplementary characters. They treat {@code char} values from the 150 * surrogate ranges as undefined characters. For example, 151 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 152 * this specific value if followed by any low-surrogate value in a string 153 * would represent a letter. 154 * 155 * <li>The methods that accept an {@code int} value support all 156 * Unicode characters, including supplementary characters. For 157 * example, {@code Character.isLetter(0x2F81A)} returns 158 * {@code true} because the code point value represents a letter 159 * (a CJK ideograph). 160 * </ul> 161 * 162 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 163 * used for character values in the range between U+0000 and U+10FFFF, 164 * and <em>Unicode code unit</em> is used for 16-bit 165 * {@code char} values that are code units of the <em>UTF-16</em> 166 * encoding. For more information on Unicode terminology, refer to the 167 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 168 * 169 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 170 * class; programmers should treat instances that are {@linkplain #equals(Object) equal} 171 * as interchangeable and should not use instances for synchronization, mutexes, or 172 * with {@linkplain java.lang.ref.Reference object references}. 173 * 174 * <div class="preview-block"> 175 * <div class="preview-comment"> 176 * When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}. 177 * Use of value class instances for synchronization, mutexes, or with 178 * {@linkplain java.lang.ref.Reference object references} result in 179 * {@link IdentityException}. 180 * </div> 181 * </div> 182 * 183 * @spec https://www.unicode.org/reports/tr27 Unicode 3.1.0 184 * @author Lee Boynton 185 * @author Guy Steele 186 * @author Akira Tanaka 187 * @author Martin Buchholz 188 * @author Ulf Zibis 189 * @since 1.0 190 */ 191 @jdk.internal.MigratedValueClass 192 @jdk.internal.ValueBased 193 public final class Character implements java.io.Serializable, Comparable<Character>, Constable { 194 /** 195 * The minimum radix available for conversion to and from strings. 196 * The constant value of this field is the smallest value permitted 197 * for the radix argument in radix-conversion methods such as the 198 * {@code digit} method, the {@code forDigit} method, and the 199 * {@code toString} method of class {@code Integer}. 200 * 201 * @see Character#digit(char, int) 202 * @see Character#forDigit(int, int) 203 * @see Integer#toString(int, int) 204 * @see Integer#valueOf(String) 205 */ 206 public static final int MIN_RADIX = 2; 207 208 /** 209 * The maximum radix available for conversion to and from strings. 210 * The constant value of this field is the largest value permitted 211 * for the radix argument in radix-conversion methods such as the 212 * {@code digit} method, the {@code forDigit} method, and the 213 * {@code toString} method of class {@code Integer}. 214 * 215 * @see Character#digit(char, int) 216 * @see Character#forDigit(int, int) 217 * @see Integer#toString(int, int) 218 * @see Integer#valueOf(String) 219 */ 220 public static final int MAX_RADIX = 36; 221 222 /** 223 * The constant value of this field is the smallest value of type 224 * {@code char}, {@code '\u005Cu0000'}. 225 * 226 * @since 1.0.2 227 */ 228 public static final char MIN_VALUE = '\u0000'; 229 230 /** 231 * The constant value of this field is the largest value of type 232 * {@code char}, {@code '\u005CuFFFF'}. 233 * 234 * @since 1.0.2 235 */ 236 public static final char MAX_VALUE = '\uFFFF'; 237 238 /** 239 * The {@code Class} instance representing the primitive type 240 * {@code char}. 241 * 242 * @since 1.1 243 */ 244 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 245 246 /* 247 * Normative general types 248 */ 249 250 /* 251 * General character types 252 */ 253 254 /** 255 * General category "Cn" in the Unicode specification. 256 * @since 1.1 257 */ 258 public static final byte UNASSIGNED = 0; 259 260 /** 261 * General category "Lu" in the Unicode specification. 262 * @since 1.1 263 */ 264 public static final byte UPPERCASE_LETTER = 1; 265 266 /** 267 * General category "Ll" in the Unicode specification. 268 * @since 1.1 269 */ 270 public static final byte LOWERCASE_LETTER = 2; 271 272 /** 273 * General category "Lt" in the Unicode specification. 274 * @since 1.1 275 */ 276 public static final byte TITLECASE_LETTER = 3; 277 278 /** 279 * General category "Lm" in the Unicode specification. 280 * @since 1.1 281 */ 282 public static final byte MODIFIER_LETTER = 4; 283 284 /** 285 * General category "Lo" in the Unicode specification. 286 * @since 1.1 287 */ 288 public static final byte OTHER_LETTER = 5; 289 290 /** 291 * General category "Mn" in the Unicode specification. 292 * @since 1.1 293 */ 294 public static final byte NON_SPACING_MARK = 6; 295 296 /** 297 * General category "Me" in the Unicode specification. 298 * @since 1.1 299 */ 300 public static final byte ENCLOSING_MARK = 7; 301 302 /** 303 * General category "Mc" in the Unicode specification. 304 * @since 1.1 305 */ 306 public static final byte COMBINING_SPACING_MARK = 8; 307 308 /** 309 * General category "Nd" in the Unicode specification. 310 * @since 1.1 311 */ 312 public static final byte DECIMAL_DIGIT_NUMBER = 9; 313 314 /** 315 * General category "Nl" in the Unicode specification. 316 * @since 1.1 317 */ 318 public static final byte LETTER_NUMBER = 10; 319 320 /** 321 * General category "No" in the Unicode specification. 322 * @since 1.1 323 */ 324 public static final byte OTHER_NUMBER = 11; 325 326 /** 327 * General category "Zs" in the Unicode specification. 328 * @since 1.1 329 */ 330 public static final byte SPACE_SEPARATOR = 12; 331 332 /** 333 * General category "Zl" in the Unicode specification. 334 * @since 1.1 335 */ 336 public static final byte LINE_SEPARATOR = 13; 337 338 /** 339 * General category "Zp" in the Unicode specification. 340 * @since 1.1 341 */ 342 public static final byte PARAGRAPH_SEPARATOR = 14; 343 344 /** 345 * General category "Cc" in the Unicode specification. 346 * @since 1.1 347 */ 348 public static final byte CONTROL = 15; 349 350 /** 351 * General category "Cf" in the Unicode specification. 352 * @since 1.1 353 */ 354 public static final byte FORMAT = 16; 355 356 /** 357 * General category "Co" in the Unicode specification. 358 * @since 1.1 359 */ 360 public static final byte PRIVATE_USE = 18; 361 362 /** 363 * General category "Cs" in the Unicode specification. 364 * @since 1.1 365 */ 366 public static final byte SURROGATE = 19; 367 368 /** 369 * General category "Pd" in the Unicode specification. 370 * @since 1.1 371 */ 372 public static final byte DASH_PUNCTUATION = 20; 373 374 /** 375 * General category "Ps" in the Unicode specification. 376 * @since 1.1 377 */ 378 public static final byte START_PUNCTUATION = 21; 379 380 /** 381 * General category "Pe" in the Unicode specification. 382 * @since 1.1 383 */ 384 public static final byte END_PUNCTUATION = 22; 385 386 /** 387 * General category "Pc" in the Unicode specification. 388 * @since 1.1 389 */ 390 public static final byte CONNECTOR_PUNCTUATION = 23; 391 392 /** 393 * General category "Po" in the Unicode specification. 394 * @since 1.1 395 */ 396 public static final byte OTHER_PUNCTUATION = 24; 397 398 /** 399 * General category "Sm" in the Unicode specification. 400 * @since 1.1 401 */ 402 public static final byte MATH_SYMBOL = 25; 403 404 /** 405 * General category "Sc" in the Unicode specification. 406 * @since 1.1 407 */ 408 public static final byte CURRENCY_SYMBOL = 26; 409 410 /** 411 * General category "Sk" in the Unicode specification. 412 * @since 1.1 413 */ 414 public static final byte MODIFIER_SYMBOL = 27; 415 416 /** 417 * General category "So" in the Unicode specification. 418 * @since 1.1 419 */ 420 public static final byte OTHER_SYMBOL = 28; 421 422 /** 423 * General category "Pi" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 427 428 /** 429 * General category "Pf" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 433 434 /** 435 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 436 */ 437 static final int ERROR = 0xFFFFFFFF; 438 439 440 /** 441 * Undefined bidirectional character type. Undefined {@code char} 442 * values have undefined directionality in the Unicode specification. 443 * @since 1.4 444 */ 445 public static final byte DIRECTIONALITY_UNDEFINED = -1; 446 447 /** 448 * Strong bidirectional character type "L" in the Unicode specification. 449 * @since 1.4 450 */ 451 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 452 453 /** 454 * Strong bidirectional character type "R" in the Unicode specification. 455 * @since 1.4 456 */ 457 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 458 459 /** 460 * Strong bidirectional character type "AL" in the Unicode specification. 461 * @since 1.4 462 */ 463 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 464 465 /** 466 * Weak bidirectional character type "EN" in the Unicode specification. 467 * @since 1.4 468 */ 469 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 470 471 /** 472 * Weak bidirectional character type "ES" in the Unicode specification. 473 * @since 1.4 474 */ 475 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 476 477 /** 478 * Weak bidirectional character type "ET" in the Unicode specification. 479 * @since 1.4 480 */ 481 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 482 483 /** 484 * Weak bidirectional character type "AN" in the Unicode specification. 485 * @since 1.4 486 */ 487 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 488 489 /** 490 * Weak bidirectional character type "CS" in the Unicode specification. 491 * @since 1.4 492 */ 493 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 494 495 /** 496 * Weak bidirectional character type "NSM" in the Unicode specification. 497 * @since 1.4 498 */ 499 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 500 501 /** 502 * Weak bidirectional character type "BN" in the Unicode specification. 503 * @since 1.4 504 */ 505 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 506 507 /** 508 * Neutral bidirectional character type "B" in the Unicode specification. 509 * @since 1.4 510 */ 511 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 512 513 /** 514 * Neutral bidirectional character type "S" in the Unicode specification. 515 * @since 1.4 516 */ 517 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 518 519 /** 520 * Neutral bidirectional character type "WS" in the Unicode specification. 521 * @since 1.4 522 */ 523 public static final byte DIRECTIONALITY_WHITESPACE = 12; 524 525 /** 526 * Neutral bidirectional character type "ON" in the Unicode specification. 527 * @since 1.4 528 */ 529 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 530 531 /** 532 * Strong bidirectional character type "LRE" in the Unicode specification. 533 * @since 1.4 534 */ 535 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 536 537 /** 538 * Strong bidirectional character type "LRO" in the Unicode specification. 539 * @since 1.4 540 */ 541 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 542 543 /** 544 * Strong bidirectional character type "RLE" in the Unicode specification. 545 * @since 1.4 546 */ 547 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 548 549 /** 550 * Strong bidirectional character type "RLO" in the Unicode specification. 551 * @since 1.4 552 */ 553 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 554 555 /** 556 * Weak bidirectional character type "PDF" in the Unicode specification. 557 * @since 1.4 558 */ 559 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 560 561 /** 562 * Weak bidirectional character type "LRI" in the Unicode specification. 563 * @since 9 564 */ 565 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 566 567 /** 568 * Weak bidirectional character type "RLI" in the Unicode specification. 569 * @since 9 570 */ 571 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 572 573 /** 574 * Weak bidirectional character type "FSI" in the Unicode specification. 575 * @since 9 576 */ 577 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 578 579 /** 580 * Weak bidirectional character type "PDI" in the Unicode specification. 581 * @since 9 582 */ 583 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 584 585 /** 586 * The minimum value of a 587 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 588 * Unicode high-surrogate code unit</a> 589 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 590 * A high-surrogate is also known as a <i>leading-surrogate</i>. 591 * 592 * @since 1.5 593 */ 594 public static final char MIN_HIGH_SURROGATE = '\uD800'; 595 596 /** 597 * The maximum value of a 598 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 599 * Unicode high-surrogate code unit</a> 600 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 601 * A high-surrogate is also known as a <i>leading-surrogate</i>. 602 * 603 * @since 1.5 604 */ 605 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 606 607 /** 608 * The minimum value of a 609 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 610 * Unicode low-surrogate code unit</a> 611 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 612 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 613 * 614 * @since 1.5 615 */ 616 public static final char MIN_LOW_SURROGATE = '\uDC00'; 617 618 /** 619 * The maximum value of a 620 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 621 * Unicode low-surrogate code unit</a> 622 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 623 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 624 * 625 * @since 1.5 626 */ 627 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 628 629 /** 630 * The minimum value of a Unicode surrogate code unit in the 631 * UTF-16 encoding, constant {@code '\u005CuD800'}. 632 * 633 * @since 1.5 634 */ 635 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 636 637 /** 638 * The maximum value of a Unicode surrogate code unit in the 639 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 640 * 641 * @since 1.5 642 */ 643 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 644 645 /** 646 * The minimum value of a 647 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 648 * Unicode supplementary code point</a>, constant {@code U+10000}. 649 * 650 * @since 1.5 651 */ 652 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 653 654 /** 655 * The minimum value of a 656 * <a href="http://www.unicode.org/glossary/#code_point"> 657 * Unicode code point</a>, constant {@code U+0000}. 658 * 659 * @since 1.5 660 */ 661 public static final int MIN_CODE_POINT = 0x000000; 662 663 /** 664 * The maximum value of a 665 * <a href="http://www.unicode.org/glossary/#code_point"> 666 * Unicode code point</a>, constant {@code U+10FFFF}. 667 * 668 * @since 1.5 669 */ 670 public static final int MAX_CODE_POINT = 0X10FFFF; 671 672 /** 673 * Returns an {@link Optional} containing the nominal descriptor for this 674 * instance. 675 * 676 * @return an {@link Optional} describing the {@linkplain Character} instance 677 * @since 15 678 */ 679 @Override 680 public Optional<DynamicConstantDesc<Character>> describeConstable() { 681 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 682 } 683 684 /** 685 * Instances of this class represent particular subsets of the Unicode 686 * character set. The only family of subsets defined in the 687 * {@code Character} class is {@link Character.UnicodeBlock}. 688 * Other portions of the Java API may define other subsets for their 689 * own purposes. 690 * 691 * @since 1.2 692 */ 693 public static class Subset { 694 695 private String name; 696 697 /** 698 * Constructs a new {@code Subset} instance. 699 * 700 * @param name The name of this subset 701 * @throws NullPointerException if name is {@code null} 702 */ 703 protected Subset(String name) { 704 if (name == null) { 705 throw new NullPointerException("name"); 706 } 707 this.name = name; 708 } 709 710 /** 711 * Compares two {@code Subset} objects for equality. 712 * This method returns {@code true} if and only if 713 * {@code this} and the argument refer to the same 714 * object; since this method is {@code final}, this 715 * guarantee holds for all subclasses. 716 */ 717 public final boolean equals(Object obj) { 718 return (this == obj); 719 } 720 721 /** 722 * Returns the standard hash code as defined by the 723 * {@link Object#hashCode} method. This method 724 * is {@code final} in order to ensure that the 725 * {@code equals} and {@code hashCode} methods will 726 * be consistent in all subclasses. 727 */ 728 public final int hashCode() { 729 return super.hashCode(); 730 } 731 732 /** 733 * Returns the name of this subset. 734 */ 735 public final String toString() { 736 return name; 737 } 738 } 739 740 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 741 // for the latest specification of Unicode Blocks. 742 743 /** 744 * A family of character subsets representing the character blocks in the 745 * Unicode specification. Character blocks generally define characters 746 * used for a specific script or purpose. A character is contained by 747 * at most one Unicode block. 748 * 749 * @since 1.2 750 */ 751 public static final class UnicodeBlock extends Subset { 752 /** 753 * NUM_ENTITIES should match the total number of UnicodeBlocks. 754 * It should be adjusted whenever the Unicode Character Database 755 * is upgraded. 756 */ 757 private static final int NUM_ENTITIES = 759; 758 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 759 760 /** 761 * Creates a UnicodeBlock with the given identifier name. 762 * This name must be the same as the block identifier. 763 */ 764 private UnicodeBlock(String idName) { 765 super(idName); 766 map.put(idName, this); 767 } 768 769 /** 770 * Creates a UnicodeBlock with the given identifier name and 771 * alias name. 772 */ 773 private UnicodeBlock(String idName, String alias) { 774 this(idName); 775 map.put(alias, this); 776 } 777 778 /** 779 * Creates a UnicodeBlock with the given identifier name and 780 * alias names. 781 */ 782 private UnicodeBlock(String idName, String... aliases) { 783 this(idName); 784 for (String alias : aliases) 785 map.put(alias, this); 786 } 787 788 /** 789 * Constant for the "Basic Latin" Unicode character block. 790 * @since 1.2 791 */ 792 public static final UnicodeBlock BASIC_LATIN = 793 new UnicodeBlock("BASIC_LATIN", 794 "BASIC LATIN", 795 "BASICLATIN"); 796 797 /** 798 * Constant for the "Latin-1 Supplement" Unicode character block. 799 * @since 1.2 800 */ 801 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 802 new UnicodeBlock("LATIN_1_SUPPLEMENT", 803 "LATIN-1 SUPPLEMENT", 804 "LATIN-1SUPPLEMENT"); 805 806 /** 807 * Constant for the "Latin Extended-A" Unicode character block. 808 * @since 1.2 809 */ 810 public static final UnicodeBlock LATIN_EXTENDED_A = 811 new UnicodeBlock("LATIN_EXTENDED_A", 812 "LATIN EXTENDED-A", 813 "LATINEXTENDED-A"); 814 815 /** 816 * Constant for the "Latin Extended-B" Unicode character block. 817 * @since 1.2 818 */ 819 public static final UnicodeBlock LATIN_EXTENDED_B = 820 new UnicodeBlock("LATIN_EXTENDED_B", 821 "LATIN EXTENDED-B", 822 "LATINEXTENDED-B"); 823 824 /** 825 * Constant for the "IPA Extensions" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock IPA_EXTENSIONS = 829 new UnicodeBlock("IPA_EXTENSIONS", 830 "IPA EXTENSIONS", 831 "IPAEXTENSIONS"); 832 833 /** 834 * Constant for the "Spacing Modifier Letters" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 838 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 839 "SPACING MODIFIER LETTERS", 840 "SPACINGMODIFIERLETTERS"); 841 842 /** 843 * Constant for the "Combining Diacritical Marks" Unicode character block. 844 * @since 1.2 845 */ 846 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 847 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 848 "COMBINING DIACRITICAL MARKS", 849 "COMBININGDIACRITICALMARKS"); 850 851 /** 852 * Constant for the "Greek and Coptic" Unicode character block. 853 * <p> 854 * This block was previously known as the "Greek" block. 855 * 856 * @since 1.2 857 */ 858 public static final UnicodeBlock GREEK = 859 new UnicodeBlock("GREEK", 860 "GREEK AND COPTIC", 861 "GREEKANDCOPTIC"); 862 863 /** 864 * Constant for the "Cyrillic" Unicode character block. 865 * @since 1.2 866 */ 867 public static final UnicodeBlock CYRILLIC = 868 new UnicodeBlock("CYRILLIC"); 869 870 /** 871 * Constant for the "Armenian" Unicode character block. 872 * @since 1.2 873 */ 874 public static final UnicodeBlock ARMENIAN = 875 new UnicodeBlock("ARMENIAN"); 876 877 /** 878 * Constant for the "Hebrew" Unicode character block. 879 * @since 1.2 880 */ 881 public static final UnicodeBlock HEBREW = 882 new UnicodeBlock("HEBREW"); 883 884 /** 885 * Constant for the "Arabic" Unicode character block. 886 * @since 1.2 887 */ 888 public static final UnicodeBlock ARABIC = 889 new UnicodeBlock("ARABIC"); 890 891 /** 892 * Constant for the "Devanagari" Unicode character block. 893 * @since 1.2 894 */ 895 public static final UnicodeBlock DEVANAGARI = 896 new UnicodeBlock("DEVANAGARI"); 897 898 /** 899 * Constant for the "Bengali" Unicode character block. 900 * @since 1.2 901 */ 902 public static final UnicodeBlock BENGALI = 903 new UnicodeBlock("BENGALI"); 904 905 /** 906 * Constant for the "Gurmukhi" Unicode character block. 907 * @since 1.2 908 */ 909 public static final UnicodeBlock GURMUKHI = 910 new UnicodeBlock("GURMUKHI"); 911 912 /** 913 * Constant for the "Gujarati" Unicode character block. 914 * @since 1.2 915 */ 916 public static final UnicodeBlock GUJARATI = 917 new UnicodeBlock("GUJARATI"); 918 919 /** 920 * Constant for the "Oriya" Unicode character block. 921 * @since 1.2 922 */ 923 public static final UnicodeBlock ORIYA = 924 new UnicodeBlock("ORIYA"); 925 926 /** 927 * Constant for the "Tamil" Unicode character block. 928 * @since 1.2 929 */ 930 public static final UnicodeBlock TAMIL = 931 new UnicodeBlock("TAMIL"); 932 933 /** 934 * Constant for the "Telugu" Unicode character block. 935 * @since 1.2 936 */ 937 public static final UnicodeBlock TELUGU = 938 new UnicodeBlock("TELUGU"); 939 940 /** 941 * Constant for the "Kannada" Unicode character block. 942 * @since 1.2 943 */ 944 public static final UnicodeBlock KANNADA = 945 new UnicodeBlock("KANNADA"); 946 947 /** 948 * Constant for the "Malayalam" Unicode character block. 949 * @since 1.2 950 */ 951 public static final UnicodeBlock MALAYALAM = 952 new UnicodeBlock("MALAYALAM"); 953 954 /** 955 * Constant for the "Thai" Unicode character block. 956 * @since 1.2 957 */ 958 public static final UnicodeBlock THAI = 959 new UnicodeBlock("THAI"); 960 961 /** 962 * Constant for the "Lao" Unicode character block. 963 * @since 1.2 964 */ 965 public static final UnicodeBlock LAO = 966 new UnicodeBlock("LAO"); 967 968 /** 969 * Constant for the "Tibetan" Unicode character block. 970 * @since 1.2 971 */ 972 public static final UnicodeBlock TIBETAN = 973 new UnicodeBlock("TIBETAN"); 974 975 /** 976 * Constant for the "Georgian" Unicode character block. 977 * @since 1.2 978 */ 979 public static final UnicodeBlock GEORGIAN = 980 new UnicodeBlock("GEORGIAN"); 981 982 /** 983 * Constant for the "Hangul Jamo" Unicode character block. 984 * @since 1.2 985 */ 986 public static final UnicodeBlock HANGUL_JAMO = 987 new UnicodeBlock("HANGUL_JAMO", 988 "HANGUL JAMO", 989 "HANGULJAMO"); 990 991 /** 992 * Constant for the "Latin Extended Additional" Unicode character block. 993 * @since 1.2 994 */ 995 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 996 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 997 "LATIN EXTENDED ADDITIONAL", 998 "LATINEXTENDEDADDITIONAL"); 999 1000 /** 1001 * Constant for the "Greek Extended" Unicode character block. 1002 * @since 1.2 1003 */ 1004 public static final UnicodeBlock GREEK_EXTENDED = 1005 new UnicodeBlock("GREEK_EXTENDED", 1006 "GREEK EXTENDED", 1007 "GREEKEXTENDED"); 1008 1009 /** 1010 * Constant for the "General Punctuation" Unicode character block. 1011 * @since 1.2 1012 */ 1013 public static final UnicodeBlock GENERAL_PUNCTUATION = 1014 new UnicodeBlock("GENERAL_PUNCTUATION", 1015 "GENERAL PUNCTUATION", 1016 "GENERALPUNCTUATION"); 1017 1018 /** 1019 * Constant for the "Superscripts and Subscripts" Unicode character 1020 * block. 1021 * @since 1.2 1022 */ 1023 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1024 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1025 "SUPERSCRIPTS AND SUBSCRIPTS", 1026 "SUPERSCRIPTSANDSUBSCRIPTS"); 1027 1028 /** 1029 * Constant for the "Currency Symbols" Unicode character block. 1030 * @since 1.2 1031 */ 1032 public static final UnicodeBlock CURRENCY_SYMBOLS = 1033 new UnicodeBlock("CURRENCY_SYMBOLS", 1034 "CURRENCY SYMBOLS", 1035 "CURRENCYSYMBOLS"); 1036 1037 /** 1038 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1039 * character block. 1040 * <p> 1041 * This block was previously known as "Combining Marks for Symbols". 1042 * @since 1.2 1043 */ 1044 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1045 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1046 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1047 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1048 "COMBINING MARKS FOR SYMBOLS", 1049 "COMBININGMARKSFORSYMBOLS"); 1050 1051 /** 1052 * Constant for the "Letterlike Symbols" Unicode character block. 1053 * @since 1.2 1054 */ 1055 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1056 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1057 "LETTERLIKE SYMBOLS", 1058 "LETTERLIKESYMBOLS"); 1059 1060 /** 1061 * Constant for the "Number Forms" Unicode character block. 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock NUMBER_FORMS = 1065 new UnicodeBlock("NUMBER_FORMS", 1066 "NUMBER FORMS", 1067 "NUMBERFORMS"); 1068 1069 /** 1070 * Constant for the "Arrows" Unicode character block. 1071 * @since 1.2 1072 */ 1073 public static final UnicodeBlock ARROWS = 1074 new UnicodeBlock("ARROWS"); 1075 1076 /** 1077 * Constant for the "Mathematical Operators" Unicode character block. 1078 * @since 1.2 1079 */ 1080 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1081 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1082 "MATHEMATICAL OPERATORS", 1083 "MATHEMATICALOPERATORS"); 1084 1085 /** 1086 * Constant for the "Miscellaneous Technical" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1090 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1091 "MISCELLANEOUS TECHNICAL", 1092 "MISCELLANEOUSTECHNICAL"); 1093 1094 /** 1095 * Constant for the "Control Pictures" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock CONTROL_PICTURES = 1099 new UnicodeBlock("CONTROL_PICTURES", 1100 "CONTROL PICTURES", 1101 "CONTROLPICTURES"); 1102 1103 /** 1104 * Constant for the "Optical Character Recognition" Unicode character block. 1105 * @since 1.2 1106 */ 1107 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1108 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1109 "OPTICAL CHARACTER RECOGNITION", 1110 "OPTICALCHARACTERRECOGNITION"); 1111 1112 /** 1113 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1114 * @since 1.2 1115 */ 1116 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1117 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1118 "ENCLOSED ALPHANUMERICS", 1119 "ENCLOSEDALPHANUMERICS"); 1120 1121 /** 1122 * Constant for the "Box Drawing" Unicode character block. 1123 * @since 1.2 1124 */ 1125 public static final UnicodeBlock BOX_DRAWING = 1126 new UnicodeBlock("BOX_DRAWING", 1127 "BOX DRAWING", 1128 "BOXDRAWING"); 1129 1130 /** 1131 * Constant for the "Block Elements" Unicode character block. 1132 * @since 1.2 1133 */ 1134 public static final UnicodeBlock BLOCK_ELEMENTS = 1135 new UnicodeBlock("BLOCK_ELEMENTS", 1136 "BLOCK ELEMENTS", 1137 "BLOCKELEMENTS"); 1138 1139 /** 1140 * Constant for the "Geometric Shapes" Unicode character block. 1141 * @since 1.2 1142 */ 1143 public static final UnicodeBlock GEOMETRIC_SHAPES = 1144 new UnicodeBlock("GEOMETRIC_SHAPES", 1145 "GEOMETRIC SHAPES", 1146 "GEOMETRICSHAPES"); 1147 1148 /** 1149 * Constant for the "Miscellaneous Symbols" Unicode character block. 1150 * @since 1.2 1151 */ 1152 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1153 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1154 "MISCELLANEOUS SYMBOLS", 1155 "MISCELLANEOUSSYMBOLS"); 1156 1157 /** 1158 * Constant for the "Dingbats" Unicode character block. 1159 * @since 1.2 1160 */ 1161 public static final UnicodeBlock DINGBATS = 1162 new UnicodeBlock("DINGBATS"); 1163 1164 /** 1165 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1166 * @since 1.2 1167 */ 1168 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1169 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1170 "CJK SYMBOLS AND PUNCTUATION", 1171 "CJKSYMBOLSANDPUNCTUATION"); 1172 1173 /** 1174 * Constant for the "Hiragana" Unicode character block. 1175 * @since 1.2 1176 */ 1177 public static final UnicodeBlock HIRAGANA = 1178 new UnicodeBlock("HIRAGANA"); 1179 1180 /** 1181 * Constant for the "Katakana" Unicode character block. 1182 * @since 1.2 1183 */ 1184 public static final UnicodeBlock KATAKANA = 1185 new UnicodeBlock("KATAKANA"); 1186 1187 /** 1188 * Constant for the "Bopomofo" Unicode character block. 1189 * @since 1.2 1190 */ 1191 public static final UnicodeBlock BOPOMOFO = 1192 new UnicodeBlock("BOPOMOFO"); 1193 1194 /** 1195 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1196 * @since 1.2 1197 */ 1198 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1199 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1200 "HANGUL COMPATIBILITY JAMO", 1201 "HANGULCOMPATIBILITYJAMO"); 1202 1203 /** 1204 * Constant for the "Kanbun" Unicode character block. 1205 * @since 1.2 1206 */ 1207 public static final UnicodeBlock KANBUN = 1208 new UnicodeBlock("KANBUN"); 1209 1210 /** 1211 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1212 * @since 1.2 1213 */ 1214 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1215 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1216 "ENCLOSED CJK LETTERS AND MONTHS", 1217 "ENCLOSEDCJKLETTERSANDMONTHS"); 1218 1219 /** 1220 * Constant for the "CJK Compatibility" Unicode character block. 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock CJK_COMPATIBILITY = 1224 new UnicodeBlock("CJK_COMPATIBILITY", 1225 "CJK COMPATIBILITY", 1226 "CJKCOMPATIBILITY"); 1227 1228 /** 1229 * Constant for the "CJK Unified Ideographs" Unicode character block. 1230 * @since 1.2 1231 */ 1232 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1233 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1234 "CJK UNIFIED IDEOGRAPHS", 1235 "CJKUNIFIEDIDEOGRAPHS"); 1236 1237 /** 1238 * Constant for the "Hangul Syllables" Unicode character block. 1239 * @since 1.2 1240 */ 1241 public static final UnicodeBlock HANGUL_SYLLABLES = 1242 new UnicodeBlock("HANGUL_SYLLABLES", 1243 "HANGUL SYLLABLES", 1244 "HANGULSYLLABLES"); 1245 1246 /** 1247 * Constant for the "Private Use Area" Unicode character block. 1248 * @since 1.2 1249 */ 1250 public static final UnicodeBlock PRIVATE_USE_AREA = 1251 new UnicodeBlock("PRIVATE_USE_AREA", 1252 "PRIVATE USE AREA", 1253 "PRIVATEUSEAREA"); 1254 1255 /** 1256 * Constant for the "CJK Compatibility Ideographs" Unicode character 1257 * block. 1258 * @since 1.2 1259 */ 1260 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1261 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1262 "CJK COMPATIBILITY IDEOGRAPHS", 1263 "CJKCOMPATIBILITYIDEOGRAPHS"); 1264 1265 /** 1266 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1267 * @since 1.2 1268 */ 1269 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1270 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1271 "ALPHABETIC PRESENTATION FORMS", 1272 "ALPHABETICPRESENTATIONFORMS"); 1273 1274 /** 1275 * Constant for the "Arabic Presentation Forms-A" Unicode character 1276 * block. 1277 * @since 1.2 1278 */ 1279 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1280 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1281 "ARABIC PRESENTATION FORMS-A", 1282 "ARABICPRESENTATIONFORMS-A"); 1283 1284 /** 1285 * Constant for the "Combining Half Marks" Unicode character block. 1286 * @since 1.2 1287 */ 1288 public static final UnicodeBlock COMBINING_HALF_MARKS = 1289 new UnicodeBlock("COMBINING_HALF_MARKS", 1290 "COMBINING HALF MARKS", 1291 "COMBININGHALFMARKS"); 1292 1293 /** 1294 * Constant for the "CJK Compatibility Forms" Unicode character block. 1295 * @since 1.2 1296 */ 1297 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1298 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1299 "CJK COMPATIBILITY FORMS", 1300 "CJKCOMPATIBILITYFORMS"); 1301 1302 /** 1303 * Constant for the "Small Form Variants" Unicode character block. 1304 * @since 1.2 1305 */ 1306 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1307 new UnicodeBlock("SMALL_FORM_VARIANTS", 1308 "SMALL FORM VARIANTS", 1309 "SMALLFORMVARIANTS"); 1310 1311 /** 1312 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1313 * @since 1.2 1314 */ 1315 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1316 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1317 "ARABIC PRESENTATION FORMS-B", 1318 "ARABICPRESENTATIONFORMS-B"); 1319 1320 /** 1321 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1322 * block. 1323 * @since 1.2 1324 */ 1325 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1326 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1327 "HALFWIDTH AND FULLWIDTH FORMS", 1328 "HALFWIDTHANDFULLWIDTHFORMS"); 1329 1330 /** 1331 * Constant for the "Specials" Unicode character block. 1332 * @since 1.2 1333 */ 1334 public static final UnicodeBlock SPECIALS = 1335 new UnicodeBlock("SPECIALS"); 1336 1337 /** 1338 * @deprecated 1339 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1340 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1341 * These constants match the block definitions of the Unicode Standard. 1342 * The {@link #of(char)} and {@link #of(int)} methods return the 1343 * standard constants. 1344 */ 1345 @Deprecated(since="1.5") 1346 public static final UnicodeBlock SURROGATES_AREA = 1347 new UnicodeBlock("SURROGATES_AREA"); 1348 1349 /** 1350 * Constant for the "Syriac" Unicode character block. 1351 * @since 1.4 1352 */ 1353 public static final UnicodeBlock SYRIAC = 1354 new UnicodeBlock("SYRIAC"); 1355 1356 /** 1357 * Constant for the "Thaana" Unicode character block. 1358 * @since 1.4 1359 */ 1360 public static final UnicodeBlock THAANA = 1361 new UnicodeBlock("THAANA"); 1362 1363 /** 1364 * Constant for the "Sinhala" Unicode character block. 1365 * @since 1.4 1366 */ 1367 public static final UnicodeBlock SINHALA = 1368 new UnicodeBlock("SINHALA"); 1369 1370 /** 1371 * Constant for the "Myanmar" Unicode character block. 1372 * @since 1.4 1373 */ 1374 public static final UnicodeBlock MYANMAR = 1375 new UnicodeBlock("MYANMAR"); 1376 1377 /** 1378 * Constant for the "Ethiopic" Unicode character block. 1379 * @since 1.4 1380 */ 1381 public static final UnicodeBlock ETHIOPIC = 1382 new UnicodeBlock("ETHIOPIC"); 1383 1384 /** 1385 * Constant for the "Cherokee" Unicode character block. 1386 * @since 1.4 1387 */ 1388 public static final UnicodeBlock CHEROKEE = 1389 new UnicodeBlock("CHEROKEE"); 1390 1391 /** 1392 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1393 * @since 1.4 1394 */ 1395 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1396 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1397 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1398 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1399 1400 /** 1401 * Constant for the "Ogham" Unicode character block. 1402 * @since 1.4 1403 */ 1404 public static final UnicodeBlock OGHAM = 1405 new UnicodeBlock("OGHAM"); 1406 1407 /** 1408 * Constant for the "Runic" Unicode character block. 1409 * @since 1.4 1410 */ 1411 public static final UnicodeBlock RUNIC = 1412 new UnicodeBlock("RUNIC"); 1413 1414 /** 1415 * Constant for the "Khmer" Unicode character block. 1416 * @since 1.4 1417 */ 1418 public static final UnicodeBlock KHMER = 1419 new UnicodeBlock("KHMER"); 1420 1421 /** 1422 * Constant for the "Mongolian" Unicode character block. 1423 * @since 1.4 1424 */ 1425 public static final UnicodeBlock MONGOLIAN = 1426 new UnicodeBlock("MONGOLIAN"); 1427 1428 /** 1429 * Constant for the "Braille Patterns" Unicode character block. 1430 * @since 1.4 1431 */ 1432 public static final UnicodeBlock BRAILLE_PATTERNS = 1433 new UnicodeBlock("BRAILLE_PATTERNS", 1434 "BRAILLE PATTERNS", 1435 "BRAILLEPATTERNS"); 1436 1437 /** 1438 * Constant for the "CJK Radicals Supplement" Unicode character block. 1439 * @since 1.4 1440 */ 1441 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1442 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1443 "CJK RADICALS SUPPLEMENT", 1444 "CJKRADICALSSUPPLEMENT"); 1445 1446 /** 1447 * Constant for the "Kangxi Radicals" Unicode character block. 1448 * @since 1.4 1449 */ 1450 public static final UnicodeBlock KANGXI_RADICALS = 1451 new UnicodeBlock("KANGXI_RADICALS", 1452 "KANGXI RADICALS", 1453 "KANGXIRADICALS"); 1454 1455 /** 1456 * Constant for the "Ideographic Description Characters" Unicode character block. 1457 * @since 1.4 1458 */ 1459 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1460 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1461 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1462 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1463 1464 /** 1465 * Constant for the "Bopomofo Extended" Unicode character block. 1466 * @since 1.4 1467 */ 1468 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1469 new UnicodeBlock("BOPOMOFO_EXTENDED", 1470 "BOPOMOFO EXTENDED", 1471 "BOPOMOFOEXTENDED"); 1472 1473 /** 1474 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1475 * @since 1.4 1476 */ 1477 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1478 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1479 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1480 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1481 1482 /** 1483 * Constant for the "Yi Syllables" Unicode character block. 1484 * @since 1.4 1485 */ 1486 public static final UnicodeBlock YI_SYLLABLES = 1487 new UnicodeBlock("YI_SYLLABLES", 1488 "YI SYLLABLES", 1489 "YISYLLABLES"); 1490 1491 /** 1492 * Constant for the "Yi Radicals" Unicode character block. 1493 * @since 1.4 1494 */ 1495 public static final UnicodeBlock YI_RADICALS = 1496 new UnicodeBlock("YI_RADICALS", 1497 "YI RADICALS", 1498 "YIRADICALS"); 1499 1500 /** 1501 * Constant for the "Cyrillic Supplement" Unicode character block. 1502 * This block was previously known as the "Cyrillic Supplementary" block. 1503 * @since 1.5 1504 */ 1505 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1506 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1507 "CYRILLIC SUPPLEMENTARY", 1508 "CYRILLICSUPPLEMENTARY", 1509 "CYRILLIC SUPPLEMENT", 1510 "CYRILLICSUPPLEMENT"); 1511 1512 /** 1513 * Constant for the "Tagalog" Unicode character block. 1514 * @since 1.5 1515 */ 1516 public static final UnicodeBlock TAGALOG = 1517 new UnicodeBlock("TAGALOG"); 1518 1519 /** 1520 * Constant for the "Hanunoo" Unicode character block. 1521 * @since 1.5 1522 */ 1523 public static final UnicodeBlock HANUNOO = 1524 new UnicodeBlock("HANUNOO"); 1525 1526 /** 1527 * Constant for the "Buhid" Unicode character block. 1528 * @since 1.5 1529 */ 1530 public static final UnicodeBlock BUHID = 1531 new UnicodeBlock("BUHID"); 1532 1533 /** 1534 * Constant for the "Tagbanwa" Unicode character block. 1535 * @since 1.5 1536 */ 1537 public static final UnicodeBlock TAGBANWA = 1538 new UnicodeBlock("TAGBANWA"); 1539 1540 /** 1541 * Constant for the "Limbu" Unicode character block. 1542 * @since 1.5 1543 */ 1544 public static final UnicodeBlock LIMBU = 1545 new UnicodeBlock("LIMBU"); 1546 1547 /** 1548 * Constant for the "Tai Le" Unicode character block. 1549 * @since 1.5 1550 */ 1551 public static final UnicodeBlock TAI_LE = 1552 new UnicodeBlock("TAI_LE", 1553 "TAI LE", 1554 "TAILE"); 1555 1556 /** 1557 * Constant for the "Khmer Symbols" Unicode character block. 1558 * @since 1.5 1559 */ 1560 public static final UnicodeBlock KHMER_SYMBOLS = 1561 new UnicodeBlock("KHMER_SYMBOLS", 1562 "KHMER SYMBOLS", 1563 "KHMERSYMBOLS"); 1564 1565 /** 1566 * Constant for the "Phonetic Extensions" Unicode character block. 1567 * @since 1.5 1568 */ 1569 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1570 new UnicodeBlock("PHONETIC_EXTENSIONS", 1571 "PHONETIC EXTENSIONS", 1572 "PHONETICEXTENSIONS"); 1573 1574 /** 1575 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1576 * @since 1.5 1577 */ 1578 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1579 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1580 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1581 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1582 1583 /** 1584 * Constant for the "Supplemental Arrows-A" Unicode character block. 1585 * @since 1.5 1586 */ 1587 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1588 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1589 "SUPPLEMENTAL ARROWS-A", 1590 "SUPPLEMENTALARROWS-A"); 1591 1592 /** 1593 * Constant for the "Supplemental Arrows-B" Unicode character block. 1594 * @since 1.5 1595 */ 1596 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1597 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1598 "SUPPLEMENTAL ARROWS-B", 1599 "SUPPLEMENTALARROWS-B"); 1600 1601 /** 1602 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1603 * character block. 1604 * @since 1.5 1605 */ 1606 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1607 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1608 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1609 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1610 1611 /** 1612 * Constant for the "Supplemental Mathematical Operators" Unicode 1613 * character block. 1614 * @since 1.5 1615 */ 1616 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1617 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1618 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1619 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1620 1621 /** 1622 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1623 * block. 1624 * @since 1.5 1625 */ 1626 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1627 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1628 "MISCELLANEOUS SYMBOLS AND ARROWS", 1629 "MISCELLANEOUSSYMBOLSANDARROWS"); 1630 1631 /** 1632 * Constant for the "Katakana Phonetic Extensions" Unicode character 1633 * block. 1634 * @since 1.5 1635 */ 1636 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1637 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1638 "KATAKANA PHONETIC EXTENSIONS", 1639 "KATAKANAPHONETICEXTENSIONS"); 1640 1641 /** 1642 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1643 * @since 1.5 1644 */ 1645 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1646 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1647 "YIJING HEXAGRAM SYMBOLS", 1648 "YIJINGHEXAGRAMSYMBOLS"); 1649 1650 /** 1651 * Constant for the "Variation Selectors" Unicode character block. 1652 * @since 1.5 1653 */ 1654 public static final UnicodeBlock VARIATION_SELECTORS = 1655 new UnicodeBlock("VARIATION_SELECTORS", 1656 "VARIATION SELECTORS", 1657 "VARIATIONSELECTORS"); 1658 1659 /** 1660 * Constant for the "Linear B Syllabary" Unicode character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1664 new UnicodeBlock("LINEAR_B_SYLLABARY", 1665 "LINEAR B SYLLABARY", 1666 "LINEARBSYLLABARY"); 1667 1668 /** 1669 * Constant for the "Linear B Ideograms" Unicode character block. 1670 * @since 1.5 1671 */ 1672 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1673 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1674 "LINEAR B IDEOGRAMS", 1675 "LINEARBIDEOGRAMS"); 1676 1677 /** 1678 * Constant for the "Aegean Numbers" Unicode character block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock AEGEAN_NUMBERS = 1682 new UnicodeBlock("AEGEAN_NUMBERS", 1683 "AEGEAN NUMBERS", 1684 "AEGEANNUMBERS"); 1685 1686 /** 1687 * Constant for the "Old Italic" Unicode character block. 1688 * @since 1.5 1689 */ 1690 public static final UnicodeBlock OLD_ITALIC = 1691 new UnicodeBlock("OLD_ITALIC", 1692 "OLD ITALIC", 1693 "OLDITALIC"); 1694 1695 /** 1696 * Constant for the "Gothic" Unicode character block. 1697 * @since 1.5 1698 */ 1699 public static final UnicodeBlock GOTHIC = 1700 new UnicodeBlock("GOTHIC"); 1701 1702 /** 1703 * Constant for the "Ugaritic" Unicode character block. 1704 * @since 1.5 1705 */ 1706 public static final UnicodeBlock UGARITIC = 1707 new UnicodeBlock("UGARITIC"); 1708 1709 /** 1710 * Constant for the "Deseret" Unicode character block. 1711 * @since 1.5 1712 */ 1713 public static final UnicodeBlock DESERET = 1714 new UnicodeBlock("DESERET"); 1715 1716 /** 1717 * Constant for the "Shavian" Unicode character block. 1718 * @since 1.5 1719 */ 1720 public static final UnicodeBlock SHAVIAN = 1721 new UnicodeBlock("SHAVIAN"); 1722 1723 /** 1724 * Constant for the "Osmanya" Unicode character block. 1725 * @since 1.5 1726 */ 1727 public static final UnicodeBlock OSMANYA = 1728 new UnicodeBlock("OSMANYA"); 1729 1730 /** 1731 * Constant for the "Cypriot Syllabary" Unicode character block. 1732 * @since 1.5 1733 */ 1734 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1735 new UnicodeBlock("CYPRIOT_SYLLABARY", 1736 "CYPRIOT SYLLABARY", 1737 "CYPRIOTSYLLABARY"); 1738 1739 /** 1740 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1741 * @since 1.5 1742 */ 1743 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1744 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1745 "BYZANTINE MUSICAL SYMBOLS", 1746 "BYZANTINEMUSICALSYMBOLS"); 1747 1748 /** 1749 * Constant for the "Musical Symbols" Unicode character block. 1750 * @since 1.5 1751 */ 1752 public static final UnicodeBlock MUSICAL_SYMBOLS = 1753 new UnicodeBlock("MUSICAL_SYMBOLS", 1754 "MUSICAL SYMBOLS", 1755 "MUSICALSYMBOLS"); 1756 1757 /** 1758 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1759 * @since 1.5 1760 */ 1761 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1762 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1763 "TAI XUAN JING SYMBOLS", 1764 "TAIXUANJINGSYMBOLS"); 1765 1766 /** 1767 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1768 * character block. 1769 * @since 1.5 1770 */ 1771 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1772 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1773 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1774 "MATHEMATICALALPHANUMERICSYMBOLS"); 1775 1776 /** 1777 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1778 * character block. 1779 * @since 1.5 1780 */ 1781 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1782 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1783 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1784 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1785 1786 /** 1787 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1788 * @since 1.5 1789 */ 1790 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1791 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1792 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1793 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1794 1795 /** 1796 * Constant for the "Tags" Unicode character block. 1797 * @since 1.5 1798 */ 1799 public static final UnicodeBlock TAGS = 1800 new UnicodeBlock("TAGS"); 1801 1802 /** 1803 * Constant for the "Variation Selectors Supplement" Unicode character 1804 * block. 1805 * @since 1.5 1806 */ 1807 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1808 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1809 "VARIATION SELECTORS SUPPLEMENT", 1810 "VARIATIONSELECTORSSUPPLEMENT"); 1811 1812 /** 1813 * Constant for the "Supplementary Private Use Area-A" Unicode character 1814 * block. 1815 * @since 1.5 1816 */ 1817 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1818 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1819 "SUPPLEMENTARY PRIVATE USE AREA-A", 1820 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1821 1822 /** 1823 * Constant for the "Supplementary Private Use Area-B" Unicode character 1824 * block. 1825 * @since 1.5 1826 */ 1827 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1828 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1829 "SUPPLEMENTARY PRIVATE USE AREA-B", 1830 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1831 1832 /** 1833 * Constant for the "High Surrogates" Unicode character block. 1834 * This block represents codepoint values in the high surrogate 1835 * range: U+D800 through U+DB7F 1836 * 1837 * @since 1.5 1838 */ 1839 public static final UnicodeBlock HIGH_SURROGATES = 1840 new UnicodeBlock("HIGH_SURROGATES", 1841 "HIGH SURROGATES", 1842 "HIGHSURROGATES"); 1843 1844 /** 1845 * Constant for the "High Private Use Surrogates" Unicode character 1846 * block. 1847 * This block represents codepoint values in the private use high 1848 * surrogate range: U+DB80 through U+DBFF 1849 * 1850 * @since 1.5 1851 */ 1852 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1853 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1854 "HIGH PRIVATE USE SURROGATES", 1855 "HIGHPRIVATEUSESURROGATES"); 1856 1857 /** 1858 * Constant for the "Low Surrogates" Unicode character block. 1859 * This block represents codepoint values in the low surrogate 1860 * range: U+DC00 through U+DFFF 1861 * 1862 * @since 1.5 1863 */ 1864 public static final UnicodeBlock LOW_SURROGATES = 1865 new UnicodeBlock("LOW_SURROGATES", 1866 "LOW SURROGATES", 1867 "LOWSURROGATES"); 1868 1869 /** 1870 * Constant for the "Arabic Supplement" Unicode character block. 1871 * @since 1.7 1872 */ 1873 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1874 new UnicodeBlock("ARABIC_SUPPLEMENT", 1875 "ARABIC SUPPLEMENT", 1876 "ARABICSUPPLEMENT"); 1877 1878 /** 1879 * Constant for the "NKo" Unicode character block. 1880 * @since 1.7 1881 */ 1882 public static final UnicodeBlock NKO = 1883 new UnicodeBlock("NKO"); 1884 1885 /** 1886 * Constant for the "Samaritan" Unicode character block. 1887 * @since 1.7 1888 */ 1889 public static final UnicodeBlock SAMARITAN = 1890 new UnicodeBlock("SAMARITAN"); 1891 1892 /** 1893 * Constant for the "Mandaic" Unicode character block. 1894 * @since 1.7 1895 */ 1896 public static final UnicodeBlock MANDAIC = 1897 new UnicodeBlock("MANDAIC"); 1898 1899 /** 1900 * Constant for the "Ethiopic Supplement" Unicode character block. 1901 * @since 1.7 1902 */ 1903 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1904 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1905 "ETHIOPIC SUPPLEMENT", 1906 "ETHIOPICSUPPLEMENT"); 1907 1908 /** 1909 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1910 * Unicode character block. 1911 * @since 1.7 1912 */ 1913 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1914 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1915 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1916 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1917 1918 /** 1919 * Constant for the "New Tai Lue" Unicode character block. 1920 * @since 1.7 1921 */ 1922 public static final UnicodeBlock NEW_TAI_LUE = 1923 new UnicodeBlock("NEW_TAI_LUE", 1924 "NEW TAI LUE", 1925 "NEWTAILUE"); 1926 1927 /** 1928 * Constant for the "Buginese" Unicode character block. 1929 * @since 1.7 1930 */ 1931 public static final UnicodeBlock BUGINESE = 1932 new UnicodeBlock("BUGINESE"); 1933 1934 /** 1935 * Constant for the "Tai Tham" Unicode character block. 1936 * @since 1.7 1937 */ 1938 public static final UnicodeBlock TAI_THAM = 1939 new UnicodeBlock("TAI_THAM", 1940 "TAI THAM", 1941 "TAITHAM"); 1942 1943 /** 1944 * Constant for the "Balinese" Unicode character block. 1945 * @since 1.7 1946 */ 1947 public static final UnicodeBlock BALINESE = 1948 new UnicodeBlock("BALINESE"); 1949 1950 /** 1951 * Constant for the "Sundanese" Unicode character block. 1952 * @since 1.7 1953 */ 1954 public static final UnicodeBlock SUNDANESE = 1955 new UnicodeBlock("SUNDANESE"); 1956 1957 /** 1958 * Constant for the "Batak" Unicode character block. 1959 * @since 1.7 1960 */ 1961 public static final UnicodeBlock BATAK = 1962 new UnicodeBlock("BATAK"); 1963 1964 /** 1965 * Constant for the "Lepcha" Unicode character block. 1966 * @since 1.7 1967 */ 1968 public static final UnicodeBlock LEPCHA = 1969 new UnicodeBlock("LEPCHA"); 1970 1971 /** 1972 * Constant for the "Ol Chiki" Unicode character block. 1973 * @since 1.7 1974 */ 1975 public static final UnicodeBlock OL_CHIKI = 1976 new UnicodeBlock("OL_CHIKI", 1977 "OL CHIKI", 1978 "OLCHIKI"); 1979 1980 /** 1981 * Constant for the "Vedic Extensions" Unicode character block. 1982 * @since 1.7 1983 */ 1984 public static final UnicodeBlock VEDIC_EXTENSIONS = 1985 new UnicodeBlock("VEDIC_EXTENSIONS", 1986 "VEDIC EXTENSIONS", 1987 "VEDICEXTENSIONS"); 1988 1989 /** 1990 * Constant for the "Phonetic Extensions Supplement" Unicode character 1991 * block. 1992 * @since 1.7 1993 */ 1994 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1995 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1996 "PHONETIC EXTENSIONS SUPPLEMENT", 1997 "PHONETICEXTENSIONSSUPPLEMENT"); 1998 1999 /** 2000 * Constant for the "Combining Diacritical Marks Supplement" Unicode 2001 * character block. 2002 * @since 1.7 2003 */ 2004 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2005 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2006 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 2007 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2008 2009 /** 2010 * Constant for the "Glagolitic" Unicode character block. 2011 * @since 1.7 2012 */ 2013 public static final UnicodeBlock GLAGOLITIC = 2014 new UnicodeBlock("GLAGOLITIC"); 2015 2016 /** 2017 * Constant for the "Latin Extended-C" Unicode character block. 2018 * @since 1.7 2019 */ 2020 public static final UnicodeBlock LATIN_EXTENDED_C = 2021 new UnicodeBlock("LATIN_EXTENDED_C", 2022 "LATIN EXTENDED-C", 2023 "LATINEXTENDED-C"); 2024 2025 /** 2026 * Constant for the "Coptic" Unicode character block. 2027 * @since 1.7 2028 */ 2029 public static final UnicodeBlock COPTIC = 2030 new UnicodeBlock("COPTIC"); 2031 2032 /** 2033 * Constant for the "Georgian Supplement" Unicode character block. 2034 * @since 1.7 2035 */ 2036 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2037 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2038 "GEORGIAN SUPPLEMENT", 2039 "GEORGIANSUPPLEMENT"); 2040 2041 /** 2042 * Constant for the "Tifinagh" Unicode character block. 2043 * @since 1.7 2044 */ 2045 public static final UnicodeBlock TIFINAGH = 2046 new UnicodeBlock("TIFINAGH"); 2047 2048 /** 2049 * Constant for the "Ethiopic Extended" Unicode character block. 2050 * @since 1.7 2051 */ 2052 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2053 new UnicodeBlock("ETHIOPIC_EXTENDED", 2054 "ETHIOPIC EXTENDED", 2055 "ETHIOPICEXTENDED"); 2056 2057 /** 2058 * Constant for the "Cyrillic Extended-A" Unicode character block. 2059 * @since 1.7 2060 */ 2061 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2062 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2063 "CYRILLIC EXTENDED-A", 2064 "CYRILLICEXTENDED-A"); 2065 2066 /** 2067 * Constant for the "Supplemental Punctuation" Unicode character block. 2068 * @since 1.7 2069 */ 2070 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2071 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2072 "SUPPLEMENTAL PUNCTUATION", 2073 "SUPPLEMENTALPUNCTUATION"); 2074 2075 /** 2076 * Constant for the "CJK Strokes" Unicode character block. 2077 * @since 1.7 2078 */ 2079 public static final UnicodeBlock CJK_STROKES = 2080 new UnicodeBlock("CJK_STROKES", 2081 "CJK STROKES", 2082 "CJKSTROKES"); 2083 2084 /** 2085 * Constant for the "Lisu" Unicode character block. 2086 * @since 1.7 2087 */ 2088 public static final UnicodeBlock LISU = 2089 new UnicodeBlock("LISU"); 2090 2091 /** 2092 * Constant for the "Vai" Unicode character block. 2093 * @since 1.7 2094 */ 2095 public static final UnicodeBlock VAI = 2096 new UnicodeBlock("VAI"); 2097 2098 /** 2099 * Constant for the "Cyrillic Extended-B" Unicode character block. 2100 * @since 1.7 2101 */ 2102 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2103 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2104 "CYRILLIC EXTENDED-B", 2105 "CYRILLICEXTENDED-B"); 2106 2107 /** 2108 * Constant for the "Bamum" Unicode character block. 2109 * @since 1.7 2110 */ 2111 public static final UnicodeBlock BAMUM = 2112 new UnicodeBlock("BAMUM"); 2113 2114 /** 2115 * Constant for the "Modifier Tone Letters" Unicode character block. 2116 * @since 1.7 2117 */ 2118 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2119 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2120 "MODIFIER TONE LETTERS", 2121 "MODIFIERTONELETTERS"); 2122 2123 /** 2124 * Constant for the "Latin Extended-D" Unicode character block. 2125 * @since 1.7 2126 */ 2127 public static final UnicodeBlock LATIN_EXTENDED_D = 2128 new UnicodeBlock("LATIN_EXTENDED_D", 2129 "LATIN EXTENDED-D", 2130 "LATINEXTENDED-D"); 2131 2132 /** 2133 * Constant for the "Syloti Nagri" Unicode character block. 2134 * @since 1.7 2135 */ 2136 public static final UnicodeBlock SYLOTI_NAGRI = 2137 new UnicodeBlock("SYLOTI_NAGRI", 2138 "SYLOTI NAGRI", 2139 "SYLOTINAGRI"); 2140 2141 /** 2142 * Constant for the "Common Indic Number Forms" Unicode character block. 2143 * @since 1.7 2144 */ 2145 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2146 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2147 "COMMON INDIC NUMBER FORMS", 2148 "COMMONINDICNUMBERFORMS"); 2149 2150 /** 2151 * Constant for the "Phags-pa" Unicode character block. 2152 * @since 1.7 2153 */ 2154 public static final UnicodeBlock PHAGS_PA = 2155 new UnicodeBlock("PHAGS_PA", 2156 "PHAGS-PA"); 2157 2158 /** 2159 * Constant for the "Saurashtra" Unicode character block. 2160 * @since 1.7 2161 */ 2162 public static final UnicodeBlock SAURASHTRA = 2163 new UnicodeBlock("SAURASHTRA"); 2164 2165 /** 2166 * Constant for the "Devanagari Extended" Unicode character block. 2167 * @since 1.7 2168 */ 2169 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2170 new UnicodeBlock("DEVANAGARI_EXTENDED", 2171 "DEVANAGARI EXTENDED", 2172 "DEVANAGARIEXTENDED"); 2173 2174 /** 2175 * Constant for the "Kayah Li" Unicode character block. 2176 * @since 1.7 2177 */ 2178 public static final UnicodeBlock KAYAH_LI = 2179 new UnicodeBlock("KAYAH_LI", 2180 "KAYAH LI", 2181 "KAYAHLI"); 2182 2183 /** 2184 * Constant for the "Rejang" Unicode character block. 2185 * @since 1.7 2186 */ 2187 public static final UnicodeBlock REJANG = 2188 new UnicodeBlock("REJANG"); 2189 2190 /** 2191 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2192 * @since 1.7 2193 */ 2194 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2195 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2196 "HANGUL JAMO EXTENDED-A", 2197 "HANGULJAMOEXTENDED-A"); 2198 2199 /** 2200 * Constant for the "Javanese" Unicode character block. 2201 * @since 1.7 2202 */ 2203 public static final UnicodeBlock JAVANESE = 2204 new UnicodeBlock("JAVANESE"); 2205 2206 /** 2207 * Constant for the "Cham" Unicode character block. 2208 * @since 1.7 2209 */ 2210 public static final UnicodeBlock CHAM = 2211 new UnicodeBlock("CHAM"); 2212 2213 /** 2214 * Constant for the "Myanmar Extended-A" Unicode character block. 2215 * @since 1.7 2216 */ 2217 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2218 new UnicodeBlock("MYANMAR_EXTENDED_A", 2219 "MYANMAR EXTENDED-A", 2220 "MYANMAREXTENDED-A"); 2221 2222 /** 2223 * Constant for the "Tai Viet" Unicode character block. 2224 * @since 1.7 2225 */ 2226 public static final UnicodeBlock TAI_VIET = 2227 new UnicodeBlock("TAI_VIET", 2228 "TAI VIET", 2229 "TAIVIET"); 2230 2231 /** 2232 * Constant for the "Ethiopic Extended-A" Unicode character block. 2233 * @since 1.7 2234 */ 2235 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2236 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2237 "ETHIOPIC EXTENDED-A", 2238 "ETHIOPICEXTENDED-A"); 2239 2240 /** 2241 * Constant for the "Meetei Mayek" Unicode character block. 2242 * @since 1.7 2243 */ 2244 public static final UnicodeBlock MEETEI_MAYEK = 2245 new UnicodeBlock("MEETEI_MAYEK", 2246 "MEETEI MAYEK", 2247 "MEETEIMAYEK"); 2248 2249 /** 2250 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2251 * @since 1.7 2252 */ 2253 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2254 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2255 "HANGUL JAMO EXTENDED-B", 2256 "HANGULJAMOEXTENDED-B"); 2257 2258 /** 2259 * Constant for the "Vertical Forms" Unicode character block. 2260 * @since 1.7 2261 */ 2262 public static final UnicodeBlock VERTICAL_FORMS = 2263 new UnicodeBlock("VERTICAL_FORMS", 2264 "VERTICAL FORMS", 2265 "VERTICALFORMS"); 2266 2267 /** 2268 * Constant for the "Ancient Greek Numbers" Unicode character block. 2269 * @since 1.7 2270 */ 2271 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2272 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2273 "ANCIENT GREEK NUMBERS", 2274 "ANCIENTGREEKNUMBERS"); 2275 2276 /** 2277 * Constant for the "Ancient Symbols" Unicode character block. 2278 * @since 1.7 2279 */ 2280 public static final UnicodeBlock ANCIENT_SYMBOLS = 2281 new UnicodeBlock("ANCIENT_SYMBOLS", 2282 "ANCIENT SYMBOLS", 2283 "ANCIENTSYMBOLS"); 2284 2285 /** 2286 * Constant for the "Phaistos Disc" Unicode character block. 2287 * @since 1.7 2288 */ 2289 public static final UnicodeBlock PHAISTOS_DISC = 2290 new UnicodeBlock("PHAISTOS_DISC", 2291 "PHAISTOS DISC", 2292 "PHAISTOSDISC"); 2293 2294 /** 2295 * Constant for the "Lycian" Unicode character block. 2296 * @since 1.7 2297 */ 2298 public static final UnicodeBlock LYCIAN = 2299 new UnicodeBlock("LYCIAN"); 2300 2301 /** 2302 * Constant for the "Carian" Unicode character block. 2303 * @since 1.7 2304 */ 2305 public static final UnicodeBlock CARIAN = 2306 new UnicodeBlock("CARIAN"); 2307 2308 /** 2309 * Constant for the "Old Persian" Unicode character block. 2310 * @since 1.7 2311 */ 2312 public static final UnicodeBlock OLD_PERSIAN = 2313 new UnicodeBlock("OLD_PERSIAN", 2314 "OLD PERSIAN", 2315 "OLDPERSIAN"); 2316 2317 /** 2318 * Constant for the "Imperial Aramaic" Unicode character block. 2319 * @since 1.7 2320 */ 2321 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2322 new UnicodeBlock("IMPERIAL_ARAMAIC", 2323 "IMPERIAL ARAMAIC", 2324 "IMPERIALARAMAIC"); 2325 2326 /** 2327 * Constant for the "Phoenician" Unicode character block. 2328 * @since 1.7 2329 */ 2330 public static final UnicodeBlock PHOENICIAN = 2331 new UnicodeBlock("PHOENICIAN"); 2332 2333 /** 2334 * Constant for the "Lydian" Unicode character block. 2335 * @since 1.7 2336 */ 2337 public static final UnicodeBlock LYDIAN = 2338 new UnicodeBlock("LYDIAN"); 2339 2340 /** 2341 * Constant for the "Kharoshthi" Unicode character block. 2342 * @since 1.7 2343 */ 2344 public static final UnicodeBlock KHAROSHTHI = 2345 new UnicodeBlock("KHAROSHTHI"); 2346 2347 /** 2348 * Constant for the "Old South Arabian" Unicode character block. 2349 * @since 1.7 2350 */ 2351 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2352 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2353 "OLD SOUTH ARABIAN", 2354 "OLDSOUTHARABIAN"); 2355 2356 /** 2357 * Constant for the "Avestan" Unicode character block. 2358 * @since 1.7 2359 */ 2360 public static final UnicodeBlock AVESTAN = 2361 new UnicodeBlock("AVESTAN"); 2362 2363 /** 2364 * Constant for the "Inscriptional Parthian" Unicode character block. 2365 * @since 1.7 2366 */ 2367 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2368 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2369 "INSCRIPTIONAL PARTHIAN", 2370 "INSCRIPTIONALPARTHIAN"); 2371 2372 /** 2373 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2374 * @since 1.7 2375 */ 2376 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2377 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2378 "INSCRIPTIONAL PAHLAVI", 2379 "INSCRIPTIONALPAHLAVI"); 2380 2381 /** 2382 * Constant for the "Old Turkic" Unicode character block. 2383 * @since 1.7 2384 */ 2385 public static final UnicodeBlock OLD_TURKIC = 2386 new UnicodeBlock("OLD_TURKIC", 2387 "OLD TURKIC", 2388 "OLDTURKIC"); 2389 2390 /** 2391 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2392 * @since 1.7 2393 */ 2394 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2395 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2396 "RUMI NUMERAL SYMBOLS", 2397 "RUMINUMERALSYMBOLS"); 2398 2399 /** 2400 * Constant for the "Brahmi" Unicode character block. 2401 * @since 1.7 2402 */ 2403 public static final UnicodeBlock BRAHMI = 2404 new UnicodeBlock("BRAHMI"); 2405 2406 /** 2407 * Constant for the "Kaithi" Unicode character block. 2408 * @since 1.7 2409 */ 2410 public static final UnicodeBlock KAITHI = 2411 new UnicodeBlock("KAITHI"); 2412 2413 /** 2414 * Constant for the "Cuneiform" Unicode character block. 2415 * @since 1.7 2416 */ 2417 public static final UnicodeBlock CUNEIFORM = 2418 new UnicodeBlock("CUNEIFORM"); 2419 2420 /** 2421 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2422 * character block. 2423 * @since 1.7 2424 */ 2425 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2426 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2427 "CUNEIFORM NUMBERS AND PUNCTUATION", 2428 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2429 2430 /** 2431 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2432 * @since 1.7 2433 */ 2434 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2435 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2436 "EGYPTIAN HIEROGLYPHS", 2437 "EGYPTIANHIEROGLYPHS"); 2438 2439 /** 2440 * Constant for the "Bamum Supplement" Unicode character block. 2441 * @since 1.7 2442 */ 2443 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2444 new UnicodeBlock("BAMUM_SUPPLEMENT", 2445 "BAMUM SUPPLEMENT", 2446 "BAMUMSUPPLEMENT"); 2447 2448 /** 2449 * Constant for the "Kana Supplement" Unicode character block. 2450 * @since 1.7 2451 */ 2452 public static final UnicodeBlock KANA_SUPPLEMENT = 2453 new UnicodeBlock("KANA_SUPPLEMENT", 2454 "KANA SUPPLEMENT", 2455 "KANASUPPLEMENT"); 2456 2457 /** 2458 * Constant for the "Ancient Greek Musical Notation" Unicode character 2459 * block. 2460 * @since 1.7 2461 */ 2462 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2463 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2464 "ANCIENT GREEK MUSICAL NOTATION", 2465 "ANCIENTGREEKMUSICALNOTATION"); 2466 2467 /** 2468 * Constant for the "Counting Rod Numerals" Unicode character block. 2469 * @since 1.7 2470 */ 2471 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2472 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2473 "COUNTING ROD NUMERALS", 2474 "COUNTINGRODNUMERALS"); 2475 2476 /** 2477 * Constant for the "Mahjong Tiles" Unicode character block. 2478 * @since 1.7 2479 */ 2480 public static final UnicodeBlock MAHJONG_TILES = 2481 new UnicodeBlock("MAHJONG_TILES", 2482 "MAHJONG TILES", 2483 "MAHJONGTILES"); 2484 2485 /** 2486 * Constant for the "Domino Tiles" Unicode character block. 2487 * @since 1.7 2488 */ 2489 public static final UnicodeBlock DOMINO_TILES = 2490 new UnicodeBlock("DOMINO_TILES", 2491 "DOMINO TILES", 2492 "DOMINOTILES"); 2493 2494 /** 2495 * Constant for the "Playing Cards" Unicode character block. 2496 * @since 1.7 2497 */ 2498 public static final UnicodeBlock PLAYING_CARDS = 2499 new UnicodeBlock("PLAYING_CARDS", 2500 "PLAYING CARDS", 2501 "PLAYINGCARDS"); 2502 2503 /** 2504 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2505 * block. 2506 * @since 1.7 2507 */ 2508 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2509 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2510 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2511 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2512 2513 /** 2514 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2515 * block. 2516 * @since 1.7 2517 */ 2518 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2519 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2520 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2521 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2522 2523 /** 2524 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2525 * character block. 2526 * @since 1.7 2527 */ 2528 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2529 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2530 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2531 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2532 2533 /** 2534 * Constant for the "Emoticons" Unicode character block. 2535 * @since 1.7 2536 */ 2537 public static final UnicodeBlock EMOTICONS = 2538 new UnicodeBlock("EMOTICONS"); 2539 2540 /** 2541 * Constant for the "Transport And Map Symbols" Unicode character block. 2542 * @since 1.7 2543 */ 2544 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2545 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2546 "TRANSPORT AND MAP SYMBOLS", 2547 "TRANSPORTANDMAPSYMBOLS"); 2548 2549 /** 2550 * Constant for the "Alchemical Symbols" Unicode character block. 2551 * @since 1.7 2552 */ 2553 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2554 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2555 "ALCHEMICAL SYMBOLS", 2556 "ALCHEMICALSYMBOLS"); 2557 2558 /** 2559 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2560 * character block. 2561 * @since 1.7 2562 */ 2563 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2564 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2565 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2566 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2567 2568 /** 2569 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2570 * character block. 2571 * @since 1.7 2572 */ 2573 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2574 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2575 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2576 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2577 2578 /** 2579 * Constant for the "Arabic Extended-A" Unicode character block. 2580 * @since 1.8 2581 */ 2582 public static final UnicodeBlock ARABIC_EXTENDED_A = 2583 new UnicodeBlock("ARABIC_EXTENDED_A", 2584 "ARABIC EXTENDED-A", 2585 "ARABICEXTENDED-A"); 2586 2587 /** 2588 * Constant for the "Sundanese Supplement" Unicode character block. 2589 * @since 1.8 2590 */ 2591 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2592 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2593 "SUNDANESE SUPPLEMENT", 2594 "SUNDANESESUPPLEMENT"); 2595 2596 /** 2597 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2598 * @since 1.8 2599 */ 2600 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2601 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2602 "MEETEI MAYEK EXTENSIONS", 2603 "MEETEIMAYEKEXTENSIONS"); 2604 2605 /** 2606 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2607 * @since 1.8 2608 */ 2609 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2610 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2611 "MEROITIC HIEROGLYPHS", 2612 "MEROITICHIEROGLYPHS"); 2613 2614 /** 2615 * Constant for the "Meroitic Cursive" Unicode character block. 2616 * @since 1.8 2617 */ 2618 public static final UnicodeBlock MEROITIC_CURSIVE = 2619 new UnicodeBlock("MEROITIC_CURSIVE", 2620 "MEROITIC CURSIVE", 2621 "MEROITICCURSIVE"); 2622 2623 /** 2624 * Constant for the "Sora Sompeng" Unicode character block. 2625 * @since 1.8 2626 */ 2627 public static final UnicodeBlock SORA_SOMPENG = 2628 new UnicodeBlock("SORA_SOMPENG", 2629 "SORA SOMPENG", 2630 "SORASOMPENG"); 2631 2632 /** 2633 * Constant for the "Chakma" Unicode character block. 2634 * @since 1.8 2635 */ 2636 public static final UnicodeBlock CHAKMA = 2637 new UnicodeBlock("CHAKMA"); 2638 2639 /** 2640 * Constant for the "Sharada" Unicode character block. 2641 * @since 1.8 2642 */ 2643 public static final UnicodeBlock SHARADA = 2644 new UnicodeBlock("SHARADA"); 2645 2646 /** 2647 * Constant for the "Takri" Unicode character block. 2648 * @since 1.8 2649 */ 2650 public static final UnicodeBlock TAKRI = 2651 new UnicodeBlock("TAKRI"); 2652 2653 /** 2654 * Constant for the "Miao" Unicode character block. 2655 * @since 1.8 2656 */ 2657 public static final UnicodeBlock MIAO = 2658 new UnicodeBlock("MIAO"); 2659 2660 /** 2661 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2662 * character block. 2663 * @since 1.8 2664 */ 2665 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2666 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2667 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2668 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2669 2670 /** 2671 * Constant for the "Combining Diacritical Marks Extended" Unicode 2672 * character block. 2673 * @since 9 2674 */ 2675 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2676 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2677 "COMBINING DIACRITICAL MARKS EXTENDED", 2678 "COMBININGDIACRITICALMARKSEXTENDED"); 2679 2680 /** 2681 * Constant for the "Myanmar Extended-B" Unicode character block. 2682 * @since 9 2683 */ 2684 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2685 new UnicodeBlock("MYANMAR_EXTENDED_B", 2686 "MYANMAR EXTENDED-B", 2687 "MYANMAREXTENDED-B"); 2688 2689 /** 2690 * Constant for the "Latin Extended-E" Unicode character block. 2691 * @since 9 2692 */ 2693 public static final UnicodeBlock LATIN_EXTENDED_E = 2694 new UnicodeBlock("LATIN_EXTENDED_E", 2695 "LATIN EXTENDED-E", 2696 "LATINEXTENDED-E"); 2697 2698 /** 2699 * Constant for the "Coptic Epact Numbers" Unicode character block. 2700 * @since 9 2701 */ 2702 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2703 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2704 "COPTIC EPACT NUMBERS", 2705 "COPTICEPACTNUMBERS"); 2706 2707 /** 2708 * Constant for the "Old Permic" Unicode character block. 2709 * @since 9 2710 */ 2711 public static final UnicodeBlock OLD_PERMIC = 2712 new UnicodeBlock("OLD_PERMIC", 2713 "OLD PERMIC", 2714 "OLDPERMIC"); 2715 2716 /** 2717 * Constant for the "Elbasan" Unicode character block. 2718 * @since 9 2719 */ 2720 public static final UnicodeBlock ELBASAN = 2721 new UnicodeBlock("ELBASAN"); 2722 2723 /** 2724 * Constant for the "Caucasian Albanian" Unicode character block. 2725 * @since 9 2726 */ 2727 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2728 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2729 "CAUCASIAN ALBANIAN", 2730 "CAUCASIANALBANIAN"); 2731 2732 /** 2733 * Constant for the "Linear A" Unicode character block. 2734 * @since 9 2735 */ 2736 public static final UnicodeBlock LINEAR_A = 2737 new UnicodeBlock("LINEAR_A", 2738 "LINEAR A", 2739 "LINEARA"); 2740 2741 /** 2742 * Constant for the "Palmyrene" Unicode character block. 2743 * @since 9 2744 */ 2745 public static final UnicodeBlock PALMYRENE = 2746 new UnicodeBlock("PALMYRENE"); 2747 2748 /** 2749 * Constant for the "Nabataean" Unicode character block. 2750 * @since 9 2751 */ 2752 public static final UnicodeBlock NABATAEAN = 2753 new UnicodeBlock("NABATAEAN"); 2754 2755 /** 2756 * Constant for the "Old North Arabian" Unicode character block. 2757 * @since 9 2758 */ 2759 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2760 new UnicodeBlock("OLD_NORTH_ARABIAN", 2761 "OLD NORTH ARABIAN", 2762 "OLDNORTHARABIAN"); 2763 2764 /** 2765 * Constant for the "Manichaean" Unicode character block. 2766 * @since 9 2767 */ 2768 public static final UnicodeBlock MANICHAEAN = 2769 new UnicodeBlock("MANICHAEAN"); 2770 2771 /** 2772 * Constant for the "Psalter Pahlavi" Unicode character block. 2773 * @since 9 2774 */ 2775 public static final UnicodeBlock PSALTER_PAHLAVI = 2776 new UnicodeBlock("PSALTER_PAHLAVI", 2777 "PSALTER PAHLAVI", 2778 "PSALTERPAHLAVI"); 2779 2780 /** 2781 * Constant for the "Mahajani" Unicode character block. 2782 * @since 9 2783 */ 2784 public static final UnicodeBlock MAHAJANI = 2785 new UnicodeBlock("MAHAJANI"); 2786 2787 /** 2788 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2789 * @since 9 2790 */ 2791 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2792 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2793 "SINHALA ARCHAIC NUMBERS", 2794 "SINHALAARCHAICNUMBERS"); 2795 2796 /** 2797 * Constant for the "Khojki" Unicode character block. 2798 * @since 9 2799 */ 2800 public static final UnicodeBlock KHOJKI = 2801 new UnicodeBlock("KHOJKI"); 2802 2803 /** 2804 * Constant for the "Khudawadi" Unicode character block. 2805 * @since 9 2806 */ 2807 public static final UnicodeBlock KHUDAWADI = 2808 new UnicodeBlock("KHUDAWADI"); 2809 2810 /** 2811 * Constant for the "Grantha" Unicode character block. 2812 * @since 9 2813 */ 2814 public static final UnicodeBlock GRANTHA = 2815 new UnicodeBlock("GRANTHA"); 2816 2817 /** 2818 * Constant for the "Tirhuta" Unicode character block. 2819 * @since 9 2820 */ 2821 public static final UnicodeBlock TIRHUTA = 2822 new UnicodeBlock("TIRHUTA"); 2823 2824 /** 2825 * Constant for the "Siddham" Unicode character block. 2826 * @since 9 2827 */ 2828 public static final UnicodeBlock SIDDHAM = 2829 new UnicodeBlock("SIDDHAM"); 2830 2831 /** 2832 * Constant for the "Modi" Unicode character block. 2833 * @since 9 2834 */ 2835 public static final UnicodeBlock MODI = 2836 new UnicodeBlock("MODI"); 2837 2838 /** 2839 * Constant for the "Warang Citi" Unicode character block. 2840 * @since 9 2841 */ 2842 public static final UnicodeBlock WARANG_CITI = 2843 new UnicodeBlock("WARANG_CITI", 2844 "WARANG CITI", 2845 "WARANGCITI"); 2846 2847 /** 2848 * Constant for the "Pau Cin Hau" Unicode character block. 2849 * @since 9 2850 */ 2851 public static final UnicodeBlock PAU_CIN_HAU = 2852 new UnicodeBlock("PAU_CIN_HAU", 2853 "PAU CIN HAU", 2854 "PAUCINHAU"); 2855 2856 /** 2857 * Constant for the "Mro" Unicode character block. 2858 * @since 9 2859 */ 2860 public static final UnicodeBlock MRO = 2861 new UnicodeBlock("MRO"); 2862 2863 /** 2864 * Constant for the "Bassa Vah" Unicode character block. 2865 * @since 9 2866 */ 2867 public static final UnicodeBlock BASSA_VAH = 2868 new UnicodeBlock("BASSA_VAH", 2869 "BASSA VAH", 2870 "BASSAVAH"); 2871 2872 /** 2873 * Constant for the "Pahawh Hmong" Unicode character block. 2874 * @since 9 2875 */ 2876 public static final UnicodeBlock PAHAWH_HMONG = 2877 new UnicodeBlock("PAHAWH_HMONG", 2878 "PAHAWH HMONG", 2879 "PAHAWHHMONG"); 2880 2881 /** 2882 * Constant for the "Duployan" Unicode character block. 2883 * @since 9 2884 */ 2885 public static final UnicodeBlock DUPLOYAN = 2886 new UnicodeBlock("DUPLOYAN"); 2887 2888 /** 2889 * Constant for the "Shorthand Format Controls" Unicode character block. 2890 * @since 9 2891 */ 2892 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2893 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2894 "SHORTHAND FORMAT CONTROLS", 2895 "SHORTHANDFORMATCONTROLS"); 2896 2897 /** 2898 * Constant for the "Mende Kikakui" Unicode character block. 2899 * @since 9 2900 */ 2901 public static final UnicodeBlock MENDE_KIKAKUI = 2902 new UnicodeBlock("MENDE_KIKAKUI", 2903 "MENDE KIKAKUI", 2904 "MENDEKIKAKUI"); 2905 2906 /** 2907 * Constant for the "Ornamental Dingbats" Unicode character block. 2908 * @since 9 2909 */ 2910 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2911 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2912 "ORNAMENTAL DINGBATS", 2913 "ORNAMENTALDINGBATS"); 2914 2915 /** 2916 * Constant for the "Geometric Shapes Extended" Unicode character block. 2917 * @since 9 2918 */ 2919 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2920 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2921 "GEOMETRIC SHAPES EXTENDED", 2922 "GEOMETRICSHAPESEXTENDED"); 2923 2924 /** 2925 * Constant for the "Supplemental Arrows-C" Unicode character block. 2926 * @since 9 2927 */ 2928 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2929 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2930 "SUPPLEMENTAL ARROWS-C", 2931 "SUPPLEMENTALARROWS-C"); 2932 2933 /** 2934 * Constant for the "Cherokee Supplement" Unicode character block. 2935 * @since 9 2936 */ 2937 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2938 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2939 "CHEROKEE SUPPLEMENT", 2940 "CHEROKEESUPPLEMENT"); 2941 2942 /** 2943 * Constant for the "Hatran" Unicode character block. 2944 * @since 9 2945 */ 2946 public static final UnicodeBlock HATRAN = 2947 new UnicodeBlock("HATRAN"); 2948 2949 /** 2950 * Constant for the "Old Hungarian" Unicode character block. 2951 * @since 9 2952 */ 2953 public static final UnicodeBlock OLD_HUNGARIAN = 2954 new UnicodeBlock("OLD_HUNGARIAN", 2955 "OLD HUNGARIAN", 2956 "OLDHUNGARIAN"); 2957 2958 /** 2959 * Constant for the "Multani" Unicode character block. 2960 * @since 9 2961 */ 2962 public static final UnicodeBlock MULTANI = 2963 new UnicodeBlock("MULTANI"); 2964 2965 /** 2966 * Constant for the "Ahom" Unicode character block. 2967 * @since 9 2968 */ 2969 public static final UnicodeBlock AHOM = 2970 new UnicodeBlock("AHOM"); 2971 2972 /** 2973 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2974 * @since 9 2975 */ 2976 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2977 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2978 "EARLY DYNASTIC CUNEIFORM", 2979 "EARLYDYNASTICCUNEIFORM"); 2980 2981 /** 2982 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2983 * @since 9 2984 */ 2985 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2986 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2987 "ANATOLIAN HIEROGLYPHS", 2988 "ANATOLIANHIEROGLYPHS"); 2989 2990 /** 2991 * Constant for the "Sutton SignWriting" Unicode character block. 2992 * @since 9 2993 */ 2994 public static final UnicodeBlock SUTTON_SIGNWRITING = 2995 new UnicodeBlock("SUTTON_SIGNWRITING", 2996 "SUTTON SIGNWRITING", 2997 "SUTTONSIGNWRITING"); 2998 2999 /** 3000 * Constant for the "Supplemental Symbols and Pictographs" Unicode 3001 * character block. 3002 * @since 9 3003 */ 3004 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 3005 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 3006 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 3007 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3008 3009 /** 3010 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3011 * character block. 3012 * @since 9 3013 */ 3014 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3015 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3016 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3017 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3018 3019 /** 3020 * Constant for the "Syriac Supplement" Unicode 3021 * character block. 3022 * @since 11 3023 */ 3024 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3025 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3026 "SYRIAC SUPPLEMENT", 3027 "SYRIACSUPPLEMENT"); 3028 3029 /** 3030 * Constant for the "Cyrillic Extended-C" Unicode 3031 * character block. 3032 * @since 11 3033 */ 3034 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3035 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3036 "CYRILLIC EXTENDED-C", 3037 "CYRILLICEXTENDED-C"); 3038 3039 /** 3040 * Constant for the "Osage" Unicode 3041 * character block. 3042 * @since 11 3043 */ 3044 public static final UnicodeBlock OSAGE = 3045 new UnicodeBlock("OSAGE"); 3046 3047 /** 3048 * Constant for the "Newa" Unicode 3049 * character block. 3050 * @since 11 3051 */ 3052 public static final UnicodeBlock NEWA = 3053 new UnicodeBlock("NEWA"); 3054 3055 /** 3056 * Constant for the "Mongolian Supplement" Unicode 3057 * character block. 3058 * @since 11 3059 */ 3060 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3061 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3062 "MONGOLIAN SUPPLEMENT", 3063 "MONGOLIANSUPPLEMENT"); 3064 3065 /** 3066 * Constant for the "Marchen" Unicode 3067 * character block. 3068 * @since 11 3069 */ 3070 public static final UnicodeBlock MARCHEN = 3071 new UnicodeBlock("MARCHEN"); 3072 3073 /** 3074 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3075 * character block. 3076 * @since 11 3077 */ 3078 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3079 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3080 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3081 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3082 3083 /** 3084 * Constant for the "Tangut" Unicode 3085 * character block. 3086 * @since 11 3087 */ 3088 public static final UnicodeBlock TANGUT = 3089 new UnicodeBlock("TANGUT"); 3090 3091 /** 3092 * Constant for the "Tangut Components" Unicode 3093 * character block. 3094 * @since 11 3095 */ 3096 public static final UnicodeBlock TANGUT_COMPONENTS = 3097 new UnicodeBlock("TANGUT_COMPONENTS", 3098 "TANGUT COMPONENTS", 3099 "TANGUTCOMPONENTS"); 3100 3101 /** 3102 * Constant for the "Kana Extended-A" Unicode 3103 * character block. 3104 * @since 11 3105 */ 3106 public static final UnicodeBlock KANA_EXTENDED_A = 3107 new UnicodeBlock("KANA_EXTENDED_A", 3108 "KANA EXTENDED-A", 3109 "KANAEXTENDED-A"); 3110 /** 3111 * Constant for the "Glagolitic Supplement" Unicode 3112 * character block. 3113 * @since 11 3114 */ 3115 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3116 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3117 "GLAGOLITIC SUPPLEMENT", 3118 "GLAGOLITICSUPPLEMENT"); 3119 /** 3120 * Constant for the "Adlam" Unicode 3121 * character block. 3122 * @since 11 3123 */ 3124 public static final UnicodeBlock ADLAM = 3125 new UnicodeBlock("ADLAM"); 3126 3127 /** 3128 * Constant for the "Masaram Gondi" Unicode 3129 * character block. 3130 * @since 11 3131 */ 3132 public static final UnicodeBlock MASARAM_GONDI = 3133 new UnicodeBlock("MASARAM_GONDI", 3134 "MASARAM GONDI", 3135 "MASARAMGONDI"); 3136 3137 /** 3138 * Constant for the "Zanabazar Square" Unicode 3139 * character block. 3140 * @since 11 3141 */ 3142 public static final UnicodeBlock ZANABAZAR_SQUARE = 3143 new UnicodeBlock("ZANABAZAR_SQUARE", 3144 "ZANABAZAR SQUARE", 3145 "ZANABAZARSQUARE"); 3146 3147 /** 3148 * Constant for the "Nushu" Unicode 3149 * character block. 3150 * @since 11 3151 */ 3152 public static final UnicodeBlock NUSHU = 3153 new UnicodeBlock("NUSHU"); 3154 3155 /** 3156 * Constant for the "Soyombo" Unicode 3157 * character block. 3158 * @since 11 3159 */ 3160 public static final UnicodeBlock SOYOMBO = 3161 new UnicodeBlock("SOYOMBO"); 3162 3163 /** 3164 * Constant for the "Bhaiksuki" Unicode 3165 * character block. 3166 * @since 11 3167 */ 3168 public static final UnicodeBlock BHAIKSUKI = 3169 new UnicodeBlock("BHAIKSUKI"); 3170 3171 /** 3172 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3173 * character block. 3174 * @since 11 3175 */ 3176 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3177 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3178 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3179 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3180 /** 3181 * Constant for the "Georgian Extended" Unicode 3182 * character block. 3183 * @since 12 3184 */ 3185 public static final UnicodeBlock GEORGIAN_EXTENDED = 3186 new UnicodeBlock("GEORGIAN_EXTENDED", 3187 "GEORGIAN EXTENDED", 3188 "GEORGIANEXTENDED"); 3189 3190 /** 3191 * Constant for the "Hanifi Rohingya" Unicode 3192 * character block. 3193 * @since 12 3194 */ 3195 public static final UnicodeBlock HANIFI_ROHINGYA = 3196 new UnicodeBlock("HANIFI_ROHINGYA", 3197 "HANIFI ROHINGYA", 3198 "HANIFIROHINGYA"); 3199 3200 /** 3201 * Constant for the "Old Sogdian" Unicode 3202 * character block. 3203 * @since 12 3204 */ 3205 public static final UnicodeBlock OLD_SOGDIAN = 3206 new UnicodeBlock("OLD_SOGDIAN", 3207 "OLD SOGDIAN", 3208 "OLDSOGDIAN"); 3209 3210 /** 3211 * Constant for the "Sogdian" Unicode 3212 * character block. 3213 * @since 12 3214 */ 3215 public static final UnicodeBlock SOGDIAN = 3216 new UnicodeBlock("SOGDIAN"); 3217 3218 /** 3219 * Constant for the "Dogra" Unicode 3220 * character block. 3221 * @since 12 3222 */ 3223 public static final UnicodeBlock DOGRA = 3224 new UnicodeBlock("DOGRA"); 3225 3226 /** 3227 * Constant for the "Gunjala Gondi" Unicode 3228 * character block. 3229 * @since 12 3230 */ 3231 public static final UnicodeBlock GUNJALA_GONDI = 3232 new UnicodeBlock("GUNJALA_GONDI", 3233 "GUNJALA GONDI", 3234 "GUNJALAGONDI"); 3235 3236 /** 3237 * Constant for the "Makasar" Unicode 3238 * character block. 3239 * @since 12 3240 */ 3241 public static final UnicodeBlock MAKASAR = 3242 new UnicodeBlock("MAKASAR"); 3243 3244 /** 3245 * Constant for the "Medefaidrin" Unicode 3246 * character block. 3247 * @since 12 3248 */ 3249 public static final UnicodeBlock MEDEFAIDRIN = 3250 new UnicodeBlock("MEDEFAIDRIN"); 3251 3252 /** 3253 * Constant for the "Mayan Numerals" Unicode 3254 * character block. 3255 * @since 12 3256 */ 3257 public static final UnicodeBlock MAYAN_NUMERALS = 3258 new UnicodeBlock("MAYAN_NUMERALS", 3259 "MAYAN NUMERALS", 3260 "MAYANNUMERALS"); 3261 3262 /** 3263 * Constant for the "Indic Siyaq Numbers" Unicode 3264 * character block. 3265 * @since 12 3266 */ 3267 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3268 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3269 "INDIC SIYAQ NUMBERS", 3270 "INDICSIYAQNUMBERS"); 3271 3272 /** 3273 * Constant for the "Chess Symbols" Unicode 3274 * character block. 3275 * @since 12 3276 */ 3277 public static final UnicodeBlock CHESS_SYMBOLS = 3278 new UnicodeBlock("CHESS_SYMBOLS", 3279 "CHESS SYMBOLS", 3280 "CHESSSYMBOLS"); 3281 3282 /** 3283 * Constant for the "Elymaic" Unicode 3284 * character block. 3285 * @since 13 3286 */ 3287 public static final UnicodeBlock ELYMAIC = 3288 new UnicodeBlock("ELYMAIC"); 3289 3290 /** 3291 * Constant for the "Nandinagari" Unicode 3292 * character block. 3293 * @since 13 3294 */ 3295 public static final UnicodeBlock NANDINAGARI = 3296 new UnicodeBlock("NANDINAGARI"); 3297 3298 /** 3299 * Constant for the "Tamil Supplement" Unicode 3300 * character block. 3301 * @since 13 3302 */ 3303 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3304 new UnicodeBlock("TAMIL_SUPPLEMENT", 3305 "TAMIL SUPPLEMENT", 3306 "TAMILSUPPLEMENT"); 3307 3308 /** 3309 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3310 * character block. 3311 * @since 13 3312 */ 3313 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3314 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3315 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3316 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3317 3318 /** 3319 * Constant for the "Small Kana Extension" Unicode 3320 * character block. 3321 * @since 13 3322 */ 3323 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3324 new UnicodeBlock("SMALL_KANA_EXTENSION", 3325 "SMALL KANA EXTENSION", 3326 "SMALLKANAEXTENSION"); 3327 3328 /** 3329 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3330 * character block. 3331 * @since 13 3332 */ 3333 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3334 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3335 "NYIAKENG PUACHUE HMONG", 3336 "NYIAKENGPUACHUEHMONG"); 3337 3338 /** 3339 * Constant for the "Wancho" Unicode 3340 * character block. 3341 * @since 13 3342 */ 3343 public static final UnicodeBlock WANCHO = 3344 new UnicodeBlock("WANCHO"); 3345 3346 /** 3347 * Constant for the "Ottoman Siyaq Numbers" Unicode 3348 * character block. 3349 * @since 13 3350 */ 3351 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3352 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3353 "OTTOMAN SIYAQ NUMBERS", 3354 "OTTOMANSIYAQNUMBERS"); 3355 3356 /** 3357 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3358 * character block. 3359 * @since 13 3360 */ 3361 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3362 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3363 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3364 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3365 3366 /** 3367 * Constant for the "Yezidi" Unicode 3368 * character block. 3369 * @since 15 3370 */ 3371 public static final UnicodeBlock YEZIDI = 3372 new UnicodeBlock("YEZIDI"); 3373 3374 /** 3375 * Constant for the "Chorasmian" Unicode 3376 * character block. 3377 * @since 15 3378 */ 3379 public static final UnicodeBlock CHORASMIAN = 3380 new UnicodeBlock("CHORASMIAN"); 3381 3382 /** 3383 * Constant for the "Dives Akuru" Unicode 3384 * character block. 3385 * @since 15 3386 */ 3387 public static final UnicodeBlock DIVES_AKURU = 3388 new UnicodeBlock("DIVES_AKURU", 3389 "DIVES AKURU", 3390 "DIVESAKURU"); 3391 3392 /** 3393 * Constant for the "Lisu Supplement" Unicode 3394 * character block. 3395 * @since 15 3396 */ 3397 public static final UnicodeBlock LISU_SUPPLEMENT = 3398 new UnicodeBlock("LISU_SUPPLEMENT", 3399 "LISU SUPPLEMENT", 3400 "LISUSUPPLEMENT"); 3401 3402 /** 3403 * Constant for the "Khitan Small Script" Unicode 3404 * character block. 3405 * @since 15 3406 */ 3407 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3408 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3409 "KHITAN SMALL SCRIPT", 3410 "KHITANSMALLSCRIPT"); 3411 3412 /** 3413 * Constant for the "Tangut Supplement" Unicode 3414 * character block. 3415 * @since 15 3416 */ 3417 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3418 new UnicodeBlock("TANGUT_SUPPLEMENT", 3419 "TANGUT SUPPLEMENT", 3420 "TANGUTSUPPLEMENT"); 3421 3422 /** 3423 * Constant for the "Symbols for Legacy Computing" Unicode 3424 * character block. 3425 * @since 15 3426 */ 3427 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3428 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3429 "SYMBOLS FOR LEGACY COMPUTING", 3430 "SYMBOLSFORLEGACYCOMPUTING"); 3431 3432 /** 3433 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3434 * character block. 3435 * @since 15 3436 */ 3437 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3438 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3439 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3440 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3441 3442 /** 3443 * Constant for the "Arabic Extended-B" Unicode 3444 * character block. 3445 * @since 19 3446 */ 3447 public static final UnicodeBlock ARABIC_EXTENDED_B = 3448 new UnicodeBlock("ARABIC_EXTENDED_B", 3449 "ARABIC EXTENDED-B", 3450 "ARABICEXTENDED-B"); 3451 3452 /** 3453 * Constant for the "Vithkuqi" Unicode 3454 * character block. 3455 * @since 19 3456 */ 3457 public static final UnicodeBlock VITHKUQI = 3458 new UnicodeBlock("VITHKUQI"); 3459 3460 /** 3461 * Constant for the "Latin Extended-F" Unicode 3462 * character block. 3463 * @since 19 3464 */ 3465 public static final UnicodeBlock LATIN_EXTENDED_F = 3466 new UnicodeBlock("LATIN_EXTENDED_F", 3467 "LATIN EXTENDED-F", 3468 "LATINEXTENDED-F"); 3469 3470 /** 3471 * Constant for the "Old Uyghur" Unicode 3472 * character block. 3473 * @since 19 3474 */ 3475 public static final UnicodeBlock OLD_UYGHUR = 3476 new UnicodeBlock("OLD_UYGHUR", 3477 "OLD UYGHUR", 3478 "OLDUYGHUR"); 3479 3480 /** 3481 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3482 * character block. 3483 * @since 19 3484 */ 3485 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3486 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3487 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3488 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3489 3490 /** 3491 * Constant for the "Cypro-Minoan" Unicode 3492 * character block. 3493 * @since 19 3494 */ 3495 public static final UnicodeBlock CYPRO_MINOAN = 3496 new UnicodeBlock("CYPRO_MINOAN", 3497 "CYPRO-MINOAN", 3498 "CYPRO-MINOAN"); 3499 3500 /** 3501 * Constant for the "Tangsa" Unicode 3502 * character block. 3503 * @since 19 3504 */ 3505 public static final UnicodeBlock TANGSA = 3506 new UnicodeBlock("TANGSA"); 3507 3508 /** 3509 * Constant for the "Kana Extended-B" Unicode 3510 * character block. 3511 * @since 19 3512 */ 3513 public static final UnicodeBlock KANA_EXTENDED_B = 3514 new UnicodeBlock("KANA_EXTENDED_B", 3515 "KANA EXTENDED-B", 3516 "KANAEXTENDED-B"); 3517 3518 /** 3519 * Constant for the "Znamenny Musical Notation" Unicode 3520 * character block. 3521 * @since 19 3522 */ 3523 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3524 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3525 "ZNAMENNY MUSICAL NOTATION", 3526 "ZNAMENNYMUSICALNOTATION"); 3527 3528 /** 3529 * Constant for the "Latin Extended-G" Unicode 3530 * character block. 3531 * @since 19 3532 */ 3533 public static final UnicodeBlock LATIN_EXTENDED_G = 3534 new UnicodeBlock("LATIN_EXTENDED_G", 3535 "LATIN EXTENDED-G", 3536 "LATINEXTENDED-G"); 3537 3538 /** 3539 * Constant for the "Toto" Unicode 3540 * character block. 3541 * @since 19 3542 */ 3543 public static final UnicodeBlock TOTO = 3544 new UnicodeBlock("TOTO"); 3545 3546 /** 3547 * Constant for the "Ethiopic Extended-B" Unicode 3548 * character block. 3549 * @since 19 3550 */ 3551 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3552 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3553 "ETHIOPIC EXTENDED-B", 3554 "ETHIOPICEXTENDED-B"); 3555 3556 /** 3557 * Constant for the "Arabic Extended-C" Unicode 3558 * character block. 3559 * @since 20 3560 */ 3561 public static final UnicodeBlock ARABIC_EXTENDED_C = 3562 new UnicodeBlock("ARABIC_EXTENDED_C", 3563 "ARABIC EXTENDED-C", 3564 "ARABICEXTENDED-C"); 3565 3566 /** 3567 * Constant for the "Devanagari Extended-A" Unicode 3568 * character block. 3569 * @since 20 3570 */ 3571 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3572 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3573 "DEVANAGARI EXTENDED-A", 3574 "DEVANAGARIEXTENDED-A"); 3575 3576 /** 3577 * Constant for the "Kawi" Unicode 3578 * character block. 3579 * @since 20 3580 */ 3581 public static final UnicodeBlock KAWI = 3582 new UnicodeBlock("KAWI"); 3583 3584 /** 3585 * Constant for the "Kaktovik Numerals" Unicode 3586 * character block. 3587 * @since 20 3588 */ 3589 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3590 new UnicodeBlock("KAKTOVIK_NUMERALS", 3591 "KAKTOVIK NUMERALS", 3592 "KAKTOVIKNUMERALS"); 3593 3594 /** 3595 * Constant for the "Cyrillic Extended-D" Unicode 3596 * character block. 3597 * @since 20 3598 */ 3599 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3600 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3601 "CYRILLIC EXTENDED-D", 3602 "CYRILLICEXTENDED-D"); 3603 3604 /** 3605 * Constant for the "Nag Mundari" Unicode 3606 * character block. 3607 * @since 20 3608 */ 3609 public static final UnicodeBlock NAG_MUNDARI = 3610 new UnicodeBlock("NAG_MUNDARI", 3611 "NAG MUNDARI", 3612 "NAGMUNDARI"); 3613 3614 /** 3615 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3616 * character block. 3617 * @since 20 3618 */ 3619 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3620 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3621 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3622 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3623 3624 /** 3625 * Constant for the "CJK Unified Ideographs Extension I" Unicode 3626 * character block. 3627 * @since 22 3628 */ 3629 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 3630 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 3631 "CJK UNIFIED IDEOGRAPHS EXTENSION I", 3632 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI"); 3633 3634 private static final int[] blockStarts = { 3635 0x0000, // 0000..007F; Basic Latin 3636 0x0080, // 0080..00FF; Latin-1 Supplement 3637 0x0100, // 0100..017F; Latin Extended-A 3638 0x0180, // 0180..024F; Latin Extended-B 3639 0x0250, // 0250..02AF; IPA Extensions 3640 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3641 0x0300, // 0300..036F; Combining Diacritical Marks 3642 0x0370, // 0370..03FF; Greek and Coptic 3643 0x0400, // 0400..04FF; Cyrillic 3644 0x0500, // 0500..052F; Cyrillic Supplement 3645 0x0530, // 0530..058F; Armenian 3646 0x0590, // 0590..05FF; Hebrew 3647 0x0600, // 0600..06FF; Arabic 3648 0x0700, // 0700..074F; Syriac 3649 0x0750, // 0750..077F; Arabic Supplement 3650 0x0780, // 0780..07BF; Thaana 3651 0x07C0, // 07C0..07FF; NKo 3652 0x0800, // 0800..083F; Samaritan 3653 0x0840, // 0840..085F; Mandaic 3654 0x0860, // 0860..086F; Syriac Supplement 3655 0x0870, // 0870..089F; Arabic Extended-B 3656 0x08A0, // 08A0..08FF; Arabic Extended-A 3657 0x0900, // 0900..097F; Devanagari 3658 0x0980, // 0980..09FF; Bengali 3659 0x0A00, // 0A00..0A7F; Gurmukhi 3660 0x0A80, // 0A80..0AFF; Gujarati 3661 0x0B00, // 0B00..0B7F; Oriya 3662 0x0B80, // 0B80..0BFF; Tamil 3663 0x0C00, // 0C00..0C7F; Telugu 3664 0x0C80, // 0C80..0CFF; Kannada 3665 0x0D00, // 0D00..0D7F; Malayalam 3666 0x0D80, // 0D80..0DFF; Sinhala 3667 0x0E00, // 0E00..0E7F; Thai 3668 0x0E80, // 0E80..0EFF; Lao 3669 0x0F00, // 0F00..0FFF; Tibetan 3670 0x1000, // 1000..109F; Myanmar 3671 0x10A0, // 10A0..10FF; Georgian 3672 0x1100, // 1100..11FF; Hangul Jamo 3673 0x1200, // 1200..137F; Ethiopic 3674 0x1380, // 1380..139F; Ethiopic Supplement 3675 0x13A0, // 13A0..13FF; Cherokee 3676 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3677 0x1680, // 1680..169F; Ogham 3678 0x16A0, // 16A0..16FF; Runic 3679 0x1700, // 1700..171F; Tagalog 3680 0x1720, // 1720..173F; Hanunoo 3681 0x1740, // 1740..175F; Buhid 3682 0x1760, // 1760..177F; Tagbanwa 3683 0x1780, // 1780..17FF; Khmer 3684 0x1800, // 1800..18AF; Mongolian 3685 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3686 0x1900, // 1900..194F; Limbu 3687 0x1950, // 1950..197F; Tai Le 3688 0x1980, // 1980..19DF; New Tai Lue 3689 0x19E0, // 19E0..19FF; Khmer Symbols 3690 0x1A00, // 1A00..1A1F; Buginese 3691 0x1A20, // 1A20..1AAF; Tai Tham 3692 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3693 0x1B00, // 1B00..1B7F; Balinese 3694 0x1B80, // 1B80..1BBF; Sundanese 3695 0x1BC0, // 1BC0..1BFF; Batak 3696 0x1C00, // 1C00..1C4F; Lepcha 3697 0x1C50, // 1C50..1C7F; Ol Chiki 3698 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3699 0x1C90, // 1C90..1CBF; Georgian Extended 3700 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3701 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3702 0x1D00, // 1D00..1D7F; Phonetic Extensions 3703 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3704 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3705 0x1E00, // 1E00..1EFF; Latin Extended Additional 3706 0x1F00, // 1F00..1FFF; Greek Extended 3707 0x2000, // 2000..206F; General Punctuation 3708 0x2070, // 2070..209F; Superscripts and Subscripts 3709 0x20A0, // 20A0..20CF; Currency Symbols 3710 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3711 0x2100, // 2100..214F; Letterlike Symbols 3712 0x2150, // 2150..218F; Number Forms 3713 0x2190, // 2190..21FF; Arrows 3714 0x2200, // 2200..22FF; Mathematical Operators 3715 0x2300, // 2300..23FF; Miscellaneous Technical 3716 0x2400, // 2400..243F; Control Pictures 3717 0x2440, // 2440..245F; Optical Character Recognition 3718 0x2460, // 2460..24FF; Enclosed Alphanumerics 3719 0x2500, // 2500..257F; Box Drawing 3720 0x2580, // 2580..259F; Block Elements 3721 0x25A0, // 25A0..25FF; Geometric Shapes 3722 0x2600, // 2600..26FF; Miscellaneous Symbols 3723 0x2700, // 2700..27BF; Dingbats 3724 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3725 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3726 0x2800, // 2800..28FF; Braille Patterns 3727 0x2900, // 2900..297F; Supplemental Arrows-B 3728 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3729 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3730 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3731 0x2C00, // 2C00..2C5F; Glagolitic 3732 0x2C60, // 2C60..2C7F; Latin Extended-C 3733 0x2C80, // 2C80..2CFF; Coptic 3734 0x2D00, // 2D00..2D2F; Georgian Supplement 3735 0x2D30, // 2D30..2D7F; Tifinagh 3736 0x2D80, // 2D80..2DDF; Ethiopic Extended 3737 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3738 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3739 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3740 0x2F00, // 2F00..2FDF; Kangxi Radicals 3741 0x2FE0, // unassigned 3742 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3743 0x3000, // 3000..303F; CJK Symbols and Punctuation 3744 0x3040, // 3040..309F; Hiragana 3745 0x30A0, // 30A0..30FF; Katakana 3746 0x3100, // 3100..312F; Bopomofo 3747 0x3130, // 3130..318F; Hangul Compatibility Jamo 3748 0x3190, // 3190..319F; Kanbun 3749 0x31A0, // 31A0..31BF; Bopomofo Extended 3750 0x31C0, // 31C0..31EF; CJK Strokes 3751 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3752 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3753 0x3300, // 3300..33FF; CJK Compatibility 3754 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3755 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3756 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3757 0xA000, // A000..A48F; Yi Syllables 3758 0xA490, // A490..A4CF; Yi Radicals 3759 0xA4D0, // A4D0..A4FF; Lisu 3760 0xA500, // A500..A63F; Vai 3761 0xA640, // A640..A69F; Cyrillic Extended-B 3762 0xA6A0, // A6A0..A6FF; Bamum 3763 0xA700, // A700..A71F; Modifier Tone Letters 3764 0xA720, // A720..A7FF; Latin Extended-D 3765 0xA800, // A800..A82F; Syloti Nagri 3766 0xA830, // A830..A83F; Common Indic Number Forms 3767 0xA840, // A840..A87F; Phags-pa 3768 0xA880, // A880..A8DF; Saurashtra 3769 0xA8E0, // A8E0..A8FF; Devanagari Extended 3770 0xA900, // A900..A92F; Kayah Li 3771 0xA930, // A930..A95F; Rejang 3772 0xA960, // A960..A97F; Hangul Jamo Extended-A 3773 0xA980, // A980..A9DF; Javanese 3774 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3775 0xAA00, // AA00..AA5F; Cham 3776 0xAA60, // AA60..AA7F; Myanmar Extended-A 3777 0xAA80, // AA80..AADF; Tai Viet 3778 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3779 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3780 0xAB30, // AB30..AB6F; Latin Extended-E 3781 0xAB70, // AB70..ABBF; Cherokee Supplement 3782 0xABC0, // ABC0..ABFF; Meetei Mayek 3783 0xAC00, // AC00..D7AF; Hangul Syllables 3784 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3785 0xD800, // D800..DB7F; High Surrogates 3786 0xDB80, // DB80..DBFF; High Private Use Surrogates 3787 0xDC00, // DC00..DFFF; Low Surrogates 3788 0xE000, // E000..F8FF; Private Use Area 3789 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3790 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3791 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3792 0xFE00, // FE00..FE0F; Variation Selectors 3793 0xFE10, // FE10..FE1F; Vertical Forms 3794 0xFE20, // FE20..FE2F; Combining Half Marks 3795 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3796 0xFE50, // FE50..FE6F; Small Form Variants 3797 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3798 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3799 0xFFF0, // FFF0..FFFF; Specials 3800 0x10000, // 10000..1007F; Linear B Syllabary 3801 0x10080, // 10080..100FF; Linear B Ideograms 3802 0x10100, // 10100..1013F; Aegean Numbers 3803 0x10140, // 10140..1018F; Ancient Greek Numbers 3804 0x10190, // 10190..101CF; Ancient Symbols 3805 0x101D0, // 101D0..101FF; Phaistos Disc 3806 0x10200, // unassigned 3807 0x10280, // 10280..1029F; Lycian 3808 0x102A0, // 102A0..102DF; Carian 3809 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3810 0x10300, // 10300..1032F; Old Italic 3811 0x10330, // 10330..1034F; Gothic 3812 0x10350, // 10350..1037F; Old Permic 3813 0x10380, // 10380..1039F; Ugaritic 3814 0x103A0, // 103A0..103DF; Old Persian 3815 0x103E0, // unassigned 3816 0x10400, // 10400..1044F; Deseret 3817 0x10450, // 10450..1047F; Shavian 3818 0x10480, // 10480..104AF; Osmanya 3819 0x104B0, // 104B0..104FF; Osage 3820 0x10500, // 10500..1052F; Elbasan 3821 0x10530, // 10530..1056F; Caucasian Albanian 3822 0x10570, // 10570..105BF; Vithkuqi 3823 0x105C0, // unassigned 3824 0x10600, // 10600..1077F; Linear A 3825 0x10780, // 10780..107BF; Latin Extended-F 3826 0x107C0, // unassigned 3827 0x10800, // 10800..1083F; Cypriot Syllabary 3828 0x10840, // 10840..1085F; Imperial Aramaic 3829 0x10860, // 10860..1087F; Palmyrene 3830 0x10880, // 10880..108AF; Nabataean 3831 0x108B0, // unassigned 3832 0x108E0, // 108E0..108FF; Hatran 3833 0x10900, // 10900..1091F; Phoenician 3834 0x10920, // 10920..1093F; Lydian 3835 0x10940, // unassigned 3836 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3837 0x109A0, // 109A0..109FF; Meroitic Cursive 3838 0x10A00, // 10A00..10A5F; Kharoshthi 3839 0x10A60, // 10A60..10A7F; Old South Arabian 3840 0x10A80, // 10A80..10A9F; Old North Arabian 3841 0x10AA0, // unassigned 3842 0x10AC0, // 10AC0..10AFF; Manichaean 3843 0x10B00, // 10B00..10B3F; Avestan 3844 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3845 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3846 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3847 0x10BB0, // unassigned 3848 0x10C00, // 10C00..10C4F; Old Turkic 3849 0x10C50, // unassigned 3850 0x10C80, // 10C80..10CFF; Old Hungarian 3851 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3852 0x10D40, // unassigned 3853 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3854 0x10E80, // 10E80..10EBF; Yezidi 3855 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 3856 0x10F00, // 10F00..10F2F; Old Sogdian 3857 0x10F30, // 10F30..10F6F; Sogdian 3858 0x10F70, // 10F70..10FAF; Old Uyghur 3859 0x10FB0, // 10FB0..10FDF; Chorasmian 3860 0x10FE0, // 10FE0..10FFF; Elymaic 3861 0x11000, // 11000..1107F; Brahmi 3862 0x11080, // 11080..110CF; Kaithi 3863 0x110D0, // 110D0..110FF; Sora Sompeng 3864 0x11100, // 11100..1114F; Chakma 3865 0x11150, // 11150..1117F; Mahajani 3866 0x11180, // 11180..111DF; Sharada 3867 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3868 0x11200, // 11200..1124F; Khojki 3869 0x11250, // unassigned 3870 0x11280, // 11280..112AF; Multani 3871 0x112B0, // 112B0..112FF; Khudawadi 3872 0x11300, // 11300..1137F; Grantha 3873 0x11380, // unassigned 3874 0x11400, // 11400..1147F; Newa 3875 0x11480, // 11480..114DF; Tirhuta 3876 0x114E0, // unassigned 3877 0x11580, // 11580..115FF; Siddham 3878 0x11600, // 11600..1165F; Modi 3879 0x11660, // 11660..1167F; Mongolian Supplement 3880 0x11680, // 11680..116CF; Takri 3881 0x116D0, // unassigned 3882 0x11700, // 11700..1174F; Ahom 3883 0x11750, // unassigned 3884 0x11800, // 11800..1184F; Dogra 3885 0x11850, // unassigned 3886 0x118A0, // 118A0..118FF; Warang Citi 3887 0x11900, // 11900..1195F; Dives Akuru 3888 0x11960, // unassigned 3889 0x119A0, // 119A0..119FF; Nandinagari 3890 0x11A00, // 11A00..11A4F; Zanabazar Square 3891 0x11A50, // 11A50..11AAF; Soyombo 3892 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 3893 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3894 0x11B00, // 11B00..11B5F; Devanagari Extended-A 3895 0x11B60, // unassigned 3896 0x11C00, // 11C00..11C6F; Bhaiksuki 3897 0x11C70, // 11C70..11CBF; Marchen 3898 0x11CC0, // unassigned 3899 0x11D00, // 11D00..11D5F; Masaram Gondi 3900 0x11D60, // 11D60..11DAF; Gunjala Gondi 3901 0x11DB0, // unassigned 3902 0x11EE0, // 11EE0..11EFF; Makasar 3903 0x11F00, // 11F00..11F5F; Kawi 3904 0x11F60, // unassigned 3905 0x11FB0, // 11FB0..11FBF; Lisu Supplement 3906 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3907 0x12000, // 12000..123FF; Cuneiform 3908 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3909 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3910 0x12550, // unassigned 3911 0x12F90, // 12F90..12FFF; Cypro-Minoan 3912 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3913 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 3914 0x13460, // unassigned 3915 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3916 0x14680, // unassigned 3917 0x16800, // 16800..16A3F; Bamum Supplement 3918 0x16A40, // 16A40..16A6F; Mro 3919 0x16A70, // 16A70..16ACF; Tangsa 3920 0x16AD0, // 16AD0..16AFF; Bassa Vah 3921 0x16B00, // 16B00..16B8F; Pahawh Hmong 3922 0x16B90, // unassigned 3923 0x16E40, // 16E40..16E9F; Medefaidrin 3924 0x16EA0, // unassigned 3925 0x16F00, // 16F00..16F9F; Miao 3926 0x16FA0, // unassigned 3927 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3928 0x17000, // 17000..187FF; Tangut 3929 0x18800, // 18800..18AFF; Tangut Components 3930 0x18B00, // 18B00..18CFF; Khitan Small Script 3931 0x18D00, // 18D00..18D7F; Tangut Supplement 3932 0x18D80, // unassigned 3933 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 3934 0x1B000, // 1B000..1B0FF; Kana Supplement 3935 0x1B100, // 1B100..1B12F; Kana Extended-A 3936 0x1B130, // 1B130..1B16F; Small Kana Extension 3937 0x1B170, // 1B170..1B2FF; Nushu 3938 0x1B300, // unassigned 3939 0x1BC00, // 1BC00..1BC9F; Duployan 3940 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3941 0x1BCB0, // unassigned 3942 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 3943 0x1CFD0, // unassigned 3944 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3945 0x1D100, // 1D100..1D1FF; Musical Symbols 3946 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3947 0x1D250, // unassigned 3948 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 3949 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3950 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3951 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3952 0x1D380, // unassigned 3953 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3954 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3955 0x1DAB0, // unassigned 3956 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 3957 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3958 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 3959 0x1E090, // unassigned 3960 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 3961 0x1E150, // unassigned 3962 0x1E290, // 1E290..1E2BF; Toto 3963 0x1E2C0, // 1E2C0..1E2FF; Wancho 3964 0x1E300, // unassigned 3965 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 3966 0x1E500, // unassigned 3967 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 3968 0x1E800, // 1E800..1E8DF; Mende Kikakui 3969 0x1E8E0, // unassigned 3970 0x1E900, // 1E900..1E95F; Adlam 3971 0x1E960, // unassigned 3972 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 3973 0x1ECC0, // unassigned 3974 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 3975 0x1ED50, // unassigned 3976 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3977 0x1EF00, // unassigned 3978 0x1F000, // 1F000..1F02F; Mahjong Tiles 3979 0x1F030, // 1F030..1F09F; Domino Tiles 3980 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3981 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3982 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3983 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3984 0x1F600, // 1F600..1F64F; Emoticons 3985 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3986 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3987 0x1F700, // 1F700..1F77F; Alchemical Symbols 3988 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3989 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3990 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3991 0x1FA00, // 1FA00..1FA6F; Chess Symbols 3992 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 3993 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 3994 0x1FC00, // unassigned 3995 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3996 0x2A6E0, // unassigned 3997 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3998 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3999 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 4000 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 4001 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I 4002 0x2EE60, // unassigned 4003 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 4004 0x2FA20, // unassigned 4005 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 4006 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 4007 0x323B0, // unassigned 4008 0xE0000, // E0000..E007F; Tags 4009 0xE0080, // unassigned 4010 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4011 0xE01F0, // unassigned 4012 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4013 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4014 }; 4015 4016 private static final UnicodeBlock[] blocks = { 4017 BASIC_LATIN, 4018 LATIN_1_SUPPLEMENT, 4019 LATIN_EXTENDED_A, 4020 LATIN_EXTENDED_B, 4021 IPA_EXTENSIONS, 4022 SPACING_MODIFIER_LETTERS, 4023 COMBINING_DIACRITICAL_MARKS, 4024 GREEK, 4025 CYRILLIC, 4026 CYRILLIC_SUPPLEMENTARY, 4027 ARMENIAN, 4028 HEBREW, 4029 ARABIC, 4030 SYRIAC, 4031 ARABIC_SUPPLEMENT, 4032 THAANA, 4033 NKO, 4034 SAMARITAN, 4035 MANDAIC, 4036 SYRIAC_SUPPLEMENT, 4037 ARABIC_EXTENDED_B, 4038 ARABIC_EXTENDED_A, 4039 DEVANAGARI, 4040 BENGALI, 4041 GURMUKHI, 4042 GUJARATI, 4043 ORIYA, 4044 TAMIL, 4045 TELUGU, 4046 KANNADA, 4047 MALAYALAM, 4048 SINHALA, 4049 THAI, 4050 LAO, 4051 TIBETAN, 4052 MYANMAR, 4053 GEORGIAN, 4054 HANGUL_JAMO, 4055 ETHIOPIC, 4056 ETHIOPIC_SUPPLEMENT, 4057 CHEROKEE, 4058 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4059 OGHAM, 4060 RUNIC, 4061 TAGALOG, 4062 HANUNOO, 4063 BUHID, 4064 TAGBANWA, 4065 KHMER, 4066 MONGOLIAN, 4067 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4068 LIMBU, 4069 TAI_LE, 4070 NEW_TAI_LUE, 4071 KHMER_SYMBOLS, 4072 BUGINESE, 4073 TAI_THAM, 4074 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4075 BALINESE, 4076 SUNDANESE, 4077 BATAK, 4078 LEPCHA, 4079 OL_CHIKI, 4080 CYRILLIC_EXTENDED_C, 4081 GEORGIAN_EXTENDED, 4082 SUNDANESE_SUPPLEMENT, 4083 VEDIC_EXTENSIONS, 4084 PHONETIC_EXTENSIONS, 4085 PHONETIC_EXTENSIONS_SUPPLEMENT, 4086 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4087 LATIN_EXTENDED_ADDITIONAL, 4088 GREEK_EXTENDED, 4089 GENERAL_PUNCTUATION, 4090 SUPERSCRIPTS_AND_SUBSCRIPTS, 4091 CURRENCY_SYMBOLS, 4092 COMBINING_MARKS_FOR_SYMBOLS, 4093 LETTERLIKE_SYMBOLS, 4094 NUMBER_FORMS, 4095 ARROWS, 4096 MATHEMATICAL_OPERATORS, 4097 MISCELLANEOUS_TECHNICAL, 4098 CONTROL_PICTURES, 4099 OPTICAL_CHARACTER_RECOGNITION, 4100 ENCLOSED_ALPHANUMERICS, 4101 BOX_DRAWING, 4102 BLOCK_ELEMENTS, 4103 GEOMETRIC_SHAPES, 4104 MISCELLANEOUS_SYMBOLS, 4105 DINGBATS, 4106 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4107 SUPPLEMENTAL_ARROWS_A, 4108 BRAILLE_PATTERNS, 4109 SUPPLEMENTAL_ARROWS_B, 4110 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4111 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4112 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4113 GLAGOLITIC, 4114 LATIN_EXTENDED_C, 4115 COPTIC, 4116 GEORGIAN_SUPPLEMENT, 4117 TIFINAGH, 4118 ETHIOPIC_EXTENDED, 4119 CYRILLIC_EXTENDED_A, 4120 SUPPLEMENTAL_PUNCTUATION, 4121 CJK_RADICALS_SUPPLEMENT, 4122 KANGXI_RADICALS, 4123 null, 4124 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4125 CJK_SYMBOLS_AND_PUNCTUATION, 4126 HIRAGANA, 4127 KATAKANA, 4128 BOPOMOFO, 4129 HANGUL_COMPATIBILITY_JAMO, 4130 KANBUN, 4131 BOPOMOFO_EXTENDED, 4132 CJK_STROKES, 4133 KATAKANA_PHONETIC_EXTENSIONS, 4134 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4135 CJK_COMPATIBILITY, 4136 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4137 YIJING_HEXAGRAM_SYMBOLS, 4138 CJK_UNIFIED_IDEOGRAPHS, 4139 YI_SYLLABLES, 4140 YI_RADICALS, 4141 LISU, 4142 VAI, 4143 CYRILLIC_EXTENDED_B, 4144 BAMUM, 4145 MODIFIER_TONE_LETTERS, 4146 LATIN_EXTENDED_D, 4147 SYLOTI_NAGRI, 4148 COMMON_INDIC_NUMBER_FORMS, 4149 PHAGS_PA, 4150 SAURASHTRA, 4151 DEVANAGARI_EXTENDED, 4152 KAYAH_LI, 4153 REJANG, 4154 HANGUL_JAMO_EXTENDED_A, 4155 JAVANESE, 4156 MYANMAR_EXTENDED_B, 4157 CHAM, 4158 MYANMAR_EXTENDED_A, 4159 TAI_VIET, 4160 MEETEI_MAYEK_EXTENSIONS, 4161 ETHIOPIC_EXTENDED_A, 4162 LATIN_EXTENDED_E, 4163 CHEROKEE_SUPPLEMENT, 4164 MEETEI_MAYEK, 4165 HANGUL_SYLLABLES, 4166 HANGUL_JAMO_EXTENDED_B, 4167 HIGH_SURROGATES, 4168 HIGH_PRIVATE_USE_SURROGATES, 4169 LOW_SURROGATES, 4170 PRIVATE_USE_AREA, 4171 CJK_COMPATIBILITY_IDEOGRAPHS, 4172 ALPHABETIC_PRESENTATION_FORMS, 4173 ARABIC_PRESENTATION_FORMS_A, 4174 VARIATION_SELECTORS, 4175 VERTICAL_FORMS, 4176 COMBINING_HALF_MARKS, 4177 CJK_COMPATIBILITY_FORMS, 4178 SMALL_FORM_VARIANTS, 4179 ARABIC_PRESENTATION_FORMS_B, 4180 HALFWIDTH_AND_FULLWIDTH_FORMS, 4181 SPECIALS, 4182 LINEAR_B_SYLLABARY, 4183 LINEAR_B_IDEOGRAMS, 4184 AEGEAN_NUMBERS, 4185 ANCIENT_GREEK_NUMBERS, 4186 ANCIENT_SYMBOLS, 4187 PHAISTOS_DISC, 4188 null, 4189 LYCIAN, 4190 CARIAN, 4191 COPTIC_EPACT_NUMBERS, 4192 OLD_ITALIC, 4193 GOTHIC, 4194 OLD_PERMIC, 4195 UGARITIC, 4196 OLD_PERSIAN, 4197 null, 4198 DESERET, 4199 SHAVIAN, 4200 OSMANYA, 4201 OSAGE, 4202 ELBASAN, 4203 CAUCASIAN_ALBANIAN, 4204 VITHKUQI, 4205 null, 4206 LINEAR_A, 4207 LATIN_EXTENDED_F, 4208 null, 4209 CYPRIOT_SYLLABARY, 4210 IMPERIAL_ARAMAIC, 4211 PALMYRENE, 4212 NABATAEAN, 4213 null, 4214 HATRAN, 4215 PHOENICIAN, 4216 LYDIAN, 4217 null, 4218 MEROITIC_HIEROGLYPHS, 4219 MEROITIC_CURSIVE, 4220 KHAROSHTHI, 4221 OLD_SOUTH_ARABIAN, 4222 OLD_NORTH_ARABIAN, 4223 null, 4224 MANICHAEAN, 4225 AVESTAN, 4226 INSCRIPTIONAL_PARTHIAN, 4227 INSCRIPTIONAL_PAHLAVI, 4228 PSALTER_PAHLAVI, 4229 null, 4230 OLD_TURKIC, 4231 null, 4232 OLD_HUNGARIAN, 4233 HANIFI_ROHINGYA, 4234 null, 4235 RUMI_NUMERAL_SYMBOLS, 4236 YEZIDI, 4237 ARABIC_EXTENDED_C, 4238 OLD_SOGDIAN, 4239 SOGDIAN, 4240 OLD_UYGHUR, 4241 CHORASMIAN, 4242 ELYMAIC, 4243 BRAHMI, 4244 KAITHI, 4245 SORA_SOMPENG, 4246 CHAKMA, 4247 MAHAJANI, 4248 SHARADA, 4249 SINHALA_ARCHAIC_NUMBERS, 4250 KHOJKI, 4251 null, 4252 MULTANI, 4253 KHUDAWADI, 4254 GRANTHA, 4255 null, 4256 NEWA, 4257 TIRHUTA, 4258 null, 4259 SIDDHAM, 4260 MODI, 4261 MONGOLIAN_SUPPLEMENT, 4262 TAKRI, 4263 null, 4264 AHOM, 4265 null, 4266 DOGRA, 4267 null, 4268 WARANG_CITI, 4269 DIVES_AKURU, 4270 null, 4271 NANDINAGARI, 4272 ZANABAZAR_SQUARE, 4273 SOYOMBO, 4274 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4275 PAU_CIN_HAU, 4276 DEVANAGARI_EXTENDED_A, 4277 null, 4278 BHAIKSUKI, 4279 MARCHEN, 4280 null, 4281 MASARAM_GONDI, 4282 GUNJALA_GONDI, 4283 null, 4284 MAKASAR, 4285 KAWI, 4286 null, 4287 LISU_SUPPLEMENT, 4288 TAMIL_SUPPLEMENT, 4289 CUNEIFORM, 4290 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4291 EARLY_DYNASTIC_CUNEIFORM, 4292 null, 4293 CYPRO_MINOAN, 4294 EGYPTIAN_HIEROGLYPHS, 4295 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4296 null, 4297 ANATOLIAN_HIEROGLYPHS, 4298 null, 4299 BAMUM_SUPPLEMENT, 4300 MRO, 4301 TANGSA, 4302 BASSA_VAH, 4303 PAHAWH_HMONG, 4304 null, 4305 MEDEFAIDRIN, 4306 null, 4307 MIAO, 4308 null, 4309 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4310 TANGUT, 4311 TANGUT_COMPONENTS, 4312 KHITAN_SMALL_SCRIPT, 4313 TANGUT_SUPPLEMENT, 4314 null, 4315 KANA_EXTENDED_B, 4316 KANA_SUPPLEMENT, 4317 KANA_EXTENDED_A, 4318 SMALL_KANA_EXTENSION, 4319 NUSHU, 4320 null, 4321 DUPLOYAN, 4322 SHORTHAND_FORMAT_CONTROLS, 4323 null, 4324 ZNAMENNY_MUSICAL_NOTATION, 4325 null, 4326 BYZANTINE_MUSICAL_SYMBOLS, 4327 MUSICAL_SYMBOLS, 4328 ANCIENT_GREEK_MUSICAL_NOTATION, 4329 null, 4330 KAKTOVIK_NUMERALS, 4331 MAYAN_NUMERALS, 4332 TAI_XUAN_JING_SYMBOLS, 4333 COUNTING_ROD_NUMERALS, 4334 null, 4335 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4336 SUTTON_SIGNWRITING, 4337 null, 4338 LATIN_EXTENDED_G, 4339 GLAGOLITIC_SUPPLEMENT, 4340 CYRILLIC_EXTENDED_D, 4341 null, 4342 NYIAKENG_PUACHUE_HMONG, 4343 null, 4344 TOTO, 4345 WANCHO, 4346 null, 4347 NAG_MUNDARI, 4348 null, 4349 ETHIOPIC_EXTENDED_B, 4350 MENDE_KIKAKUI, 4351 null, 4352 ADLAM, 4353 null, 4354 INDIC_SIYAQ_NUMBERS, 4355 null, 4356 OTTOMAN_SIYAQ_NUMBERS, 4357 null, 4358 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4359 null, 4360 MAHJONG_TILES, 4361 DOMINO_TILES, 4362 PLAYING_CARDS, 4363 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4364 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4365 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4366 EMOTICONS, 4367 ORNAMENTAL_DINGBATS, 4368 TRANSPORT_AND_MAP_SYMBOLS, 4369 ALCHEMICAL_SYMBOLS, 4370 GEOMETRIC_SHAPES_EXTENDED, 4371 SUPPLEMENTAL_ARROWS_C, 4372 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4373 CHESS_SYMBOLS, 4374 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4375 SYMBOLS_FOR_LEGACY_COMPUTING, 4376 null, 4377 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4378 null, 4379 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4380 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4381 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4382 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4383 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, 4384 null, 4385 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4386 null, 4387 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4388 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4389 null, 4390 TAGS, 4391 null, 4392 VARIATION_SELECTORS_SUPPLEMENT, 4393 null, 4394 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4395 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4396 }; 4397 4398 4399 /** 4400 * Returns the object representing the Unicode block containing the 4401 * given character, or {@code null} if the character is not a 4402 * member of a defined block. 4403 * 4404 * <p><b>Note:</b> This method cannot handle 4405 * <a href="Character.html#supplementary"> supplementary 4406 * characters</a>. To support all Unicode characters, including 4407 * supplementary characters, use the {@link #of(int)} method. 4408 * 4409 * @param c The character in question 4410 * @return The {@code UnicodeBlock} instance representing the 4411 * Unicode block of which this character is a member, or 4412 * {@code null} if the character is not a member of any 4413 * Unicode block 4414 */ 4415 public static UnicodeBlock of(char c) { 4416 return of((int)c); 4417 } 4418 4419 /** 4420 * Returns the object representing the Unicode block 4421 * containing the given character (Unicode code point), or 4422 * {@code null} if the character is not a member of a 4423 * defined block. 4424 * 4425 * @param codePoint the character (Unicode code point) in question. 4426 * @return The {@code UnicodeBlock} instance representing the 4427 * Unicode block of which this character is a member, or 4428 * {@code null} if the character is not a member of any 4429 * Unicode block 4430 * @throws IllegalArgumentException if the specified 4431 * {@code codePoint} is an invalid Unicode code point. 4432 * @see Character#isValidCodePoint(int) 4433 * @since 1.5 4434 */ 4435 public static UnicodeBlock of(int codePoint) { 4436 if (!isValidCodePoint(codePoint)) { 4437 throw new IllegalArgumentException( 4438 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4439 } 4440 4441 int top, bottom, current; 4442 bottom = 0; 4443 top = blockStarts.length; 4444 current = top/2; 4445 4446 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4447 while (top - bottom > 1) { 4448 if (codePoint >= blockStarts[current]) { 4449 bottom = current; 4450 } else { 4451 top = current; 4452 } 4453 current = (top + bottom) / 2; 4454 } 4455 return blocks[current]; 4456 } 4457 4458 /** 4459 * Returns the UnicodeBlock with the given name. Block 4460 * names are determined by The Unicode Standard. The file 4461 * {@code Blocks.txt} defines blocks for a particular 4462 * version of the standard. The {@link Character} class specifies 4463 * the version of the standard that it supports. 4464 * <p> 4465 * This method accepts block names in the following forms: 4466 * <ol> 4467 * <li> Canonical block names as defined by the Unicode Standard. 4468 * For example, the standard defines a "Basic Latin" block. Therefore, this 4469 * method accepts "Basic Latin" as a valid block name. The documentation of 4470 * each UnicodeBlock provides the canonical name. 4471 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4472 * is a valid block name for the "Basic Latin" block. 4473 * <li>The text representation of each constant UnicodeBlock identifier. 4474 * For example, this method will return the {@link #BASIC_LATIN} block if 4475 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4476 * hyphens in the canonical name with underscores. 4477 * </ol> 4478 * Finally, character case is ignored for all of the valid block name forms. 4479 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4480 * The en_US locale's case mapping rules are used to provide case-insensitive 4481 * string comparisons for block name validation. 4482 * <p> 4483 * If the Unicode Standard changes block names, both the previous and 4484 * current names will be accepted. 4485 * 4486 * @param blockName A {@code UnicodeBlock} name. 4487 * @return The {@code UnicodeBlock} instance identified 4488 * by {@code blockName} 4489 * @throws IllegalArgumentException if {@code blockName} is an 4490 * invalid name 4491 * @throws NullPointerException if {@code blockName} is null 4492 * @since 1.5 4493 */ 4494 public static final UnicodeBlock forName(String blockName) { 4495 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4496 if (block == null) { 4497 throw new IllegalArgumentException("Not a valid block name: " 4498 + blockName); 4499 } 4500 return block; 4501 } 4502 } 4503 4504 4505 /** 4506 * A family of character subsets representing the character scripts 4507 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4508 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4509 * character is assigned to a single Unicode script, either a specific 4510 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4511 * one of the following three special values, 4512 * {@link Character.UnicodeScript#INHERITED Inherited}, 4513 * {@link Character.UnicodeScript#COMMON Common} or 4514 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4515 * 4516 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4517 * @since 1.7 4518 */ 4519 public static enum UnicodeScript { 4520 /** 4521 * Unicode script "Common". 4522 */ 4523 COMMON, 4524 4525 /** 4526 * Unicode script "Latin". 4527 */ 4528 LATIN, 4529 4530 /** 4531 * Unicode script "Greek". 4532 */ 4533 GREEK, 4534 4535 /** 4536 * Unicode script "Cyrillic". 4537 */ 4538 CYRILLIC, 4539 4540 /** 4541 * Unicode script "Armenian". 4542 */ 4543 ARMENIAN, 4544 4545 /** 4546 * Unicode script "Hebrew". 4547 */ 4548 HEBREW, 4549 4550 /** 4551 * Unicode script "Arabic". 4552 */ 4553 ARABIC, 4554 4555 /** 4556 * Unicode script "Syriac". 4557 */ 4558 SYRIAC, 4559 4560 /** 4561 * Unicode script "Thaana". 4562 */ 4563 THAANA, 4564 4565 /** 4566 * Unicode script "Devanagari". 4567 */ 4568 DEVANAGARI, 4569 4570 /** 4571 * Unicode script "Bengali". 4572 */ 4573 BENGALI, 4574 4575 /** 4576 * Unicode script "Gurmukhi". 4577 */ 4578 GURMUKHI, 4579 4580 /** 4581 * Unicode script "Gujarati". 4582 */ 4583 GUJARATI, 4584 4585 /** 4586 * Unicode script "Oriya". 4587 */ 4588 ORIYA, 4589 4590 /** 4591 * Unicode script "Tamil". 4592 */ 4593 TAMIL, 4594 4595 /** 4596 * Unicode script "Telugu". 4597 */ 4598 TELUGU, 4599 4600 /** 4601 * Unicode script "Kannada". 4602 */ 4603 KANNADA, 4604 4605 /** 4606 * Unicode script "Malayalam". 4607 */ 4608 MALAYALAM, 4609 4610 /** 4611 * Unicode script "Sinhala". 4612 */ 4613 SINHALA, 4614 4615 /** 4616 * Unicode script "Thai". 4617 */ 4618 THAI, 4619 4620 /** 4621 * Unicode script "Lao". 4622 */ 4623 LAO, 4624 4625 /** 4626 * Unicode script "Tibetan". 4627 */ 4628 TIBETAN, 4629 4630 /** 4631 * Unicode script "Myanmar". 4632 */ 4633 MYANMAR, 4634 4635 /** 4636 * Unicode script "Georgian". 4637 */ 4638 GEORGIAN, 4639 4640 /** 4641 * Unicode script "Hangul". 4642 */ 4643 HANGUL, 4644 4645 /** 4646 * Unicode script "Ethiopic". 4647 */ 4648 ETHIOPIC, 4649 4650 /** 4651 * Unicode script "Cherokee". 4652 */ 4653 CHEROKEE, 4654 4655 /** 4656 * Unicode script "Canadian_Aboriginal". 4657 */ 4658 CANADIAN_ABORIGINAL, 4659 4660 /** 4661 * Unicode script "Ogham". 4662 */ 4663 OGHAM, 4664 4665 /** 4666 * Unicode script "Runic". 4667 */ 4668 RUNIC, 4669 4670 /** 4671 * Unicode script "Khmer". 4672 */ 4673 KHMER, 4674 4675 /** 4676 * Unicode script "Mongolian". 4677 */ 4678 MONGOLIAN, 4679 4680 /** 4681 * Unicode script "Hiragana". 4682 */ 4683 HIRAGANA, 4684 4685 /** 4686 * Unicode script "Katakana". 4687 */ 4688 KATAKANA, 4689 4690 /** 4691 * Unicode script "Bopomofo". 4692 */ 4693 BOPOMOFO, 4694 4695 /** 4696 * Unicode script "Han". 4697 */ 4698 HAN, 4699 4700 /** 4701 * Unicode script "Yi". 4702 */ 4703 YI, 4704 4705 /** 4706 * Unicode script "Old_Italic". 4707 */ 4708 OLD_ITALIC, 4709 4710 /** 4711 * Unicode script "Gothic". 4712 */ 4713 GOTHIC, 4714 4715 /** 4716 * Unicode script "Deseret". 4717 */ 4718 DESERET, 4719 4720 /** 4721 * Unicode script "Inherited". 4722 */ 4723 INHERITED, 4724 4725 /** 4726 * Unicode script "Tagalog". 4727 */ 4728 TAGALOG, 4729 4730 /** 4731 * Unicode script "Hanunoo". 4732 */ 4733 HANUNOO, 4734 4735 /** 4736 * Unicode script "Buhid". 4737 */ 4738 BUHID, 4739 4740 /** 4741 * Unicode script "Tagbanwa". 4742 */ 4743 TAGBANWA, 4744 4745 /** 4746 * Unicode script "Limbu". 4747 */ 4748 LIMBU, 4749 4750 /** 4751 * Unicode script "Tai_Le". 4752 */ 4753 TAI_LE, 4754 4755 /** 4756 * Unicode script "Linear_B". 4757 */ 4758 LINEAR_B, 4759 4760 /** 4761 * Unicode script "Ugaritic". 4762 */ 4763 UGARITIC, 4764 4765 /** 4766 * Unicode script "Shavian". 4767 */ 4768 SHAVIAN, 4769 4770 /** 4771 * Unicode script "Osmanya". 4772 */ 4773 OSMANYA, 4774 4775 /** 4776 * Unicode script "Cypriot". 4777 */ 4778 CYPRIOT, 4779 4780 /** 4781 * Unicode script "Braille". 4782 */ 4783 BRAILLE, 4784 4785 /** 4786 * Unicode script "Buginese". 4787 */ 4788 BUGINESE, 4789 4790 /** 4791 * Unicode script "Coptic". 4792 */ 4793 COPTIC, 4794 4795 /** 4796 * Unicode script "New_Tai_Lue". 4797 */ 4798 NEW_TAI_LUE, 4799 4800 /** 4801 * Unicode script "Glagolitic". 4802 */ 4803 GLAGOLITIC, 4804 4805 /** 4806 * Unicode script "Tifinagh". 4807 */ 4808 TIFINAGH, 4809 4810 /** 4811 * Unicode script "Syloti_Nagri". 4812 */ 4813 SYLOTI_NAGRI, 4814 4815 /** 4816 * Unicode script "Old_Persian". 4817 */ 4818 OLD_PERSIAN, 4819 4820 /** 4821 * Unicode script "Kharoshthi". 4822 */ 4823 KHAROSHTHI, 4824 4825 /** 4826 * Unicode script "Balinese". 4827 */ 4828 BALINESE, 4829 4830 /** 4831 * Unicode script "Cuneiform". 4832 */ 4833 CUNEIFORM, 4834 4835 /** 4836 * Unicode script "Phoenician". 4837 */ 4838 PHOENICIAN, 4839 4840 /** 4841 * Unicode script "Phags_Pa". 4842 */ 4843 PHAGS_PA, 4844 4845 /** 4846 * Unicode script "Nko". 4847 */ 4848 NKO, 4849 4850 /** 4851 * Unicode script "Sundanese". 4852 */ 4853 SUNDANESE, 4854 4855 /** 4856 * Unicode script "Batak". 4857 */ 4858 BATAK, 4859 4860 /** 4861 * Unicode script "Lepcha". 4862 */ 4863 LEPCHA, 4864 4865 /** 4866 * Unicode script "Ol_Chiki". 4867 */ 4868 OL_CHIKI, 4869 4870 /** 4871 * Unicode script "Vai". 4872 */ 4873 VAI, 4874 4875 /** 4876 * Unicode script "Saurashtra". 4877 */ 4878 SAURASHTRA, 4879 4880 /** 4881 * Unicode script "Kayah_Li". 4882 */ 4883 KAYAH_LI, 4884 4885 /** 4886 * Unicode script "Rejang". 4887 */ 4888 REJANG, 4889 4890 /** 4891 * Unicode script "Lycian". 4892 */ 4893 LYCIAN, 4894 4895 /** 4896 * Unicode script "Carian". 4897 */ 4898 CARIAN, 4899 4900 /** 4901 * Unicode script "Lydian". 4902 */ 4903 LYDIAN, 4904 4905 /** 4906 * Unicode script "Cham". 4907 */ 4908 CHAM, 4909 4910 /** 4911 * Unicode script "Tai_Tham". 4912 */ 4913 TAI_THAM, 4914 4915 /** 4916 * Unicode script "Tai_Viet". 4917 */ 4918 TAI_VIET, 4919 4920 /** 4921 * Unicode script "Avestan". 4922 */ 4923 AVESTAN, 4924 4925 /** 4926 * Unicode script "Egyptian_Hieroglyphs". 4927 */ 4928 EGYPTIAN_HIEROGLYPHS, 4929 4930 /** 4931 * Unicode script "Samaritan". 4932 */ 4933 SAMARITAN, 4934 4935 /** 4936 * Unicode script "Mandaic". 4937 */ 4938 MANDAIC, 4939 4940 /** 4941 * Unicode script "Lisu". 4942 */ 4943 LISU, 4944 4945 /** 4946 * Unicode script "Bamum". 4947 */ 4948 BAMUM, 4949 4950 /** 4951 * Unicode script "Javanese". 4952 */ 4953 JAVANESE, 4954 4955 /** 4956 * Unicode script "Meetei_Mayek". 4957 */ 4958 MEETEI_MAYEK, 4959 4960 /** 4961 * Unicode script "Imperial_Aramaic". 4962 */ 4963 IMPERIAL_ARAMAIC, 4964 4965 /** 4966 * Unicode script "Old_South_Arabian". 4967 */ 4968 OLD_SOUTH_ARABIAN, 4969 4970 /** 4971 * Unicode script "Inscriptional_Parthian". 4972 */ 4973 INSCRIPTIONAL_PARTHIAN, 4974 4975 /** 4976 * Unicode script "Inscriptional_Pahlavi". 4977 */ 4978 INSCRIPTIONAL_PAHLAVI, 4979 4980 /** 4981 * Unicode script "Old_Turkic". 4982 */ 4983 OLD_TURKIC, 4984 4985 /** 4986 * Unicode script "Brahmi". 4987 */ 4988 BRAHMI, 4989 4990 /** 4991 * Unicode script "Kaithi". 4992 */ 4993 KAITHI, 4994 4995 /** 4996 * Unicode script "Meroitic Hieroglyphs". 4997 * @since 1.8 4998 */ 4999 MEROITIC_HIEROGLYPHS, 5000 5001 /** 5002 * Unicode script "Meroitic Cursive". 5003 * @since 1.8 5004 */ 5005 MEROITIC_CURSIVE, 5006 5007 /** 5008 * Unicode script "Sora Sompeng". 5009 * @since 1.8 5010 */ 5011 SORA_SOMPENG, 5012 5013 /** 5014 * Unicode script "Chakma". 5015 * @since 1.8 5016 */ 5017 CHAKMA, 5018 5019 /** 5020 * Unicode script "Sharada". 5021 * @since 1.8 5022 */ 5023 SHARADA, 5024 5025 /** 5026 * Unicode script "Takri". 5027 * @since 1.8 5028 */ 5029 TAKRI, 5030 5031 /** 5032 * Unicode script "Miao". 5033 * @since 1.8 5034 */ 5035 MIAO, 5036 5037 /** 5038 * Unicode script "Caucasian Albanian". 5039 * @since 9 5040 */ 5041 CAUCASIAN_ALBANIAN, 5042 5043 /** 5044 * Unicode script "Bassa Vah". 5045 * @since 9 5046 */ 5047 BASSA_VAH, 5048 5049 /** 5050 * Unicode script "Duployan". 5051 * @since 9 5052 */ 5053 DUPLOYAN, 5054 5055 /** 5056 * Unicode script "Elbasan". 5057 * @since 9 5058 */ 5059 ELBASAN, 5060 5061 /** 5062 * Unicode script "Grantha". 5063 * @since 9 5064 */ 5065 GRANTHA, 5066 5067 /** 5068 * Unicode script "Pahawh Hmong". 5069 * @since 9 5070 */ 5071 PAHAWH_HMONG, 5072 5073 /** 5074 * Unicode script "Khojki". 5075 * @since 9 5076 */ 5077 KHOJKI, 5078 5079 /** 5080 * Unicode script "Linear A". 5081 * @since 9 5082 */ 5083 LINEAR_A, 5084 5085 /** 5086 * Unicode script "Mahajani". 5087 * @since 9 5088 */ 5089 MAHAJANI, 5090 5091 /** 5092 * Unicode script "Manichaean". 5093 * @since 9 5094 */ 5095 MANICHAEAN, 5096 5097 /** 5098 * Unicode script "Mende Kikakui". 5099 * @since 9 5100 */ 5101 MENDE_KIKAKUI, 5102 5103 /** 5104 * Unicode script "Modi". 5105 * @since 9 5106 */ 5107 MODI, 5108 5109 /** 5110 * Unicode script "Mro". 5111 * @since 9 5112 */ 5113 MRO, 5114 5115 /** 5116 * Unicode script "Old North Arabian". 5117 * @since 9 5118 */ 5119 OLD_NORTH_ARABIAN, 5120 5121 /** 5122 * Unicode script "Nabataean". 5123 * @since 9 5124 */ 5125 NABATAEAN, 5126 5127 /** 5128 * Unicode script "Palmyrene". 5129 * @since 9 5130 */ 5131 PALMYRENE, 5132 5133 /** 5134 * Unicode script "Pau Cin Hau". 5135 * @since 9 5136 */ 5137 PAU_CIN_HAU, 5138 5139 /** 5140 * Unicode script "Old Permic". 5141 * @since 9 5142 */ 5143 OLD_PERMIC, 5144 5145 /** 5146 * Unicode script "Psalter Pahlavi". 5147 * @since 9 5148 */ 5149 PSALTER_PAHLAVI, 5150 5151 /** 5152 * Unicode script "Siddham". 5153 * @since 9 5154 */ 5155 SIDDHAM, 5156 5157 /** 5158 * Unicode script "Khudawadi". 5159 * @since 9 5160 */ 5161 KHUDAWADI, 5162 5163 /** 5164 * Unicode script "Tirhuta". 5165 * @since 9 5166 */ 5167 TIRHUTA, 5168 5169 /** 5170 * Unicode script "Warang Citi". 5171 * @since 9 5172 */ 5173 WARANG_CITI, 5174 5175 /** 5176 * Unicode script "Ahom". 5177 * @since 9 5178 */ 5179 AHOM, 5180 5181 /** 5182 * Unicode script "Anatolian Hieroglyphs". 5183 * @since 9 5184 */ 5185 ANATOLIAN_HIEROGLYPHS, 5186 5187 /** 5188 * Unicode script "Hatran". 5189 * @since 9 5190 */ 5191 HATRAN, 5192 5193 /** 5194 * Unicode script "Multani". 5195 * @since 9 5196 */ 5197 MULTANI, 5198 5199 /** 5200 * Unicode script "Old Hungarian". 5201 * @since 9 5202 */ 5203 OLD_HUNGARIAN, 5204 5205 /** 5206 * Unicode script "SignWriting". 5207 * @since 9 5208 */ 5209 SIGNWRITING, 5210 5211 /** 5212 * Unicode script "Adlam". 5213 * @since 11 5214 */ 5215 ADLAM, 5216 5217 /** 5218 * Unicode script "Bhaiksuki". 5219 * @since 11 5220 */ 5221 BHAIKSUKI, 5222 5223 /** 5224 * Unicode script "Marchen". 5225 * @since 11 5226 */ 5227 MARCHEN, 5228 5229 /** 5230 * Unicode script "Newa". 5231 * @since 11 5232 */ 5233 NEWA, 5234 5235 /** 5236 * Unicode script "Osage". 5237 * @since 11 5238 */ 5239 OSAGE, 5240 5241 /** 5242 * Unicode script "Tangut". 5243 * @since 11 5244 */ 5245 TANGUT, 5246 5247 /** 5248 * Unicode script "Masaram Gondi". 5249 * @since 11 5250 */ 5251 MASARAM_GONDI, 5252 5253 /** 5254 * Unicode script "Nushu". 5255 * @since 11 5256 */ 5257 NUSHU, 5258 5259 /** 5260 * Unicode script "Soyombo". 5261 * @since 11 5262 */ 5263 SOYOMBO, 5264 5265 /** 5266 * Unicode script "Zanabazar Square". 5267 * @since 11 5268 */ 5269 ZANABAZAR_SQUARE, 5270 5271 /** 5272 * Unicode script "Hanifi Rohingya". 5273 * @since 12 5274 */ 5275 HANIFI_ROHINGYA, 5276 5277 /** 5278 * Unicode script "Old Sogdian". 5279 * @since 12 5280 */ 5281 OLD_SOGDIAN, 5282 5283 /** 5284 * Unicode script "Sogdian". 5285 * @since 12 5286 */ 5287 SOGDIAN, 5288 5289 /** 5290 * Unicode script "Dogra". 5291 * @since 12 5292 */ 5293 DOGRA, 5294 5295 /** 5296 * Unicode script "Gunjala Gondi". 5297 * @since 12 5298 */ 5299 GUNJALA_GONDI, 5300 5301 /** 5302 * Unicode script "Makasar". 5303 * @since 12 5304 */ 5305 MAKASAR, 5306 5307 /** 5308 * Unicode script "Medefaidrin". 5309 * @since 12 5310 */ 5311 MEDEFAIDRIN, 5312 5313 /** 5314 * Unicode script "Elymaic". 5315 * @since 13 5316 */ 5317 ELYMAIC, 5318 5319 /** 5320 * Unicode script "Nandinagari". 5321 * @since 13 5322 */ 5323 NANDINAGARI, 5324 5325 /** 5326 * Unicode script "Nyiakeng Puachue Hmong". 5327 * @since 13 5328 */ 5329 NYIAKENG_PUACHUE_HMONG, 5330 5331 /** 5332 * Unicode script "Wancho". 5333 * @since 13 5334 */ 5335 WANCHO, 5336 5337 /** 5338 * Unicode script "Yezidi". 5339 * @since 15 5340 */ 5341 YEZIDI, 5342 5343 /** 5344 * Unicode script "Chorasmian". 5345 * @since 15 5346 */ 5347 CHORASMIAN, 5348 5349 /** 5350 * Unicode script "Dives Akuru". 5351 * @since 15 5352 */ 5353 DIVES_AKURU, 5354 5355 /** 5356 * Unicode script "Khitan Small Script". 5357 * @since 15 5358 */ 5359 KHITAN_SMALL_SCRIPT, 5360 5361 /** 5362 * Unicode script "Vithkuqi". 5363 * @since 19 5364 */ 5365 VITHKUQI, 5366 5367 /** 5368 * Unicode script "Old Uyghur". 5369 * @since 19 5370 */ 5371 OLD_UYGHUR, 5372 5373 /** 5374 * Unicode script "Cypro Minoan". 5375 * @since 19 5376 */ 5377 CYPRO_MINOAN, 5378 5379 /** 5380 * Unicode script "Tangsa". 5381 * @since 19 5382 */ 5383 TANGSA, 5384 5385 /** 5386 * Unicode script "Toto". 5387 * @since 19 5388 */ 5389 TOTO, 5390 5391 /** 5392 * Unicode script "Kawi". 5393 * @since 20 5394 */ 5395 KAWI, 5396 5397 /** 5398 * Unicode script "Nag Mundari". 5399 * @since 20 5400 */ 5401 NAG_MUNDARI, 5402 5403 /** 5404 * Unicode script "Unknown". 5405 */ 5406 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5407 5408 private static final int[] scriptStarts = { 5409 0x0000, // 0000..0040; COMMON 5410 0x0041, // 0041..005A; LATIN 5411 0x005B, // 005B..0060; COMMON 5412 0x0061, // 0061..007A; LATIN 5413 0x007B, // 007B..00A9; COMMON 5414 0x00AA, // 00AA ; LATIN 5415 0x00AB, // 00AB..00B9; COMMON 5416 0x00BA, // 00BA ; LATIN 5417 0x00BB, // 00BB..00BF; COMMON 5418 0x00C0, // 00C0..00D6; LATIN 5419 0x00D7, // 00D7 ; COMMON 5420 0x00D8, // 00D8..00F6; LATIN 5421 0x00F7, // 00F7 ; COMMON 5422 0x00F8, // 00F8..02B8; LATIN 5423 0x02B9, // 02B9..02DF; COMMON 5424 0x02E0, // 02E0..02E4; LATIN 5425 0x02E5, // 02E5..02E9; COMMON 5426 0x02EA, // 02EA..02EB; BOPOMOFO 5427 0x02EC, // 02EC..02FF; COMMON 5428 0x0300, // 0300..036F; INHERITED 5429 0x0370, // 0370..0373; GREEK 5430 0x0374, // 0374 ; COMMON 5431 0x0375, // 0375..0377; GREEK 5432 0x0378, // 0378..0379; UNKNOWN 5433 0x037A, // 037A..037D; GREEK 5434 0x037E, // 037E ; COMMON 5435 0x037F, // 037F ; GREEK 5436 0x0380, // 0380..0383; UNKNOWN 5437 0x0384, // 0384 ; GREEK 5438 0x0385, // 0385 ; COMMON 5439 0x0386, // 0386 ; GREEK 5440 0x0387, // 0387 ; COMMON 5441 0x0388, // 0388..038A; GREEK 5442 0x038B, // 038B ; UNKNOWN 5443 0x038C, // 038C ; GREEK 5444 0x038D, // 038D ; UNKNOWN 5445 0x038E, // 038E..03A1; GREEK 5446 0x03A2, // 03A2 ; UNKNOWN 5447 0x03A3, // 03A3..03E1; GREEK 5448 0x03E2, // 03E2..03EF; COPTIC 5449 0x03F0, // 03F0..03FF; GREEK 5450 0x0400, // 0400..0484; CYRILLIC 5451 0x0485, // 0485..0486; INHERITED 5452 0x0487, // 0487..052F; CYRILLIC 5453 0x0530, // 0530 ; UNKNOWN 5454 0x0531, // 0531..0556; ARMENIAN 5455 0x0557, // 0557..0558; UNKNOWN 5456 0x0559, // 0559..058A; ARMENIAN 5457 0x058B, // 058B..058C; UNKNOWN 5458 0x058D, // 058D..058F; ARMENIAN 5459 0x0590, // 0590 ; UNKNOWN 5460 0x0591, // 0591..05C7; HEBREW 5461 0x05C8, // 05C8..05CF; UNKNOWN 5462 0x05D0, // 05D0..05EA; HEBREW 5463 0x05EB, // 05EB..05EE; UNKNOWN 5464 0x05EF, // 05EF..05F4; HEBREW 5465 0x05F5, // 05F5..05FF; UNKNOWN 5466 0x0600, // 0600..0604; ARABIC 5467 0x0605, // 0605 ; COMMON 5468 0x0606, // 0606..060B; ARABIC 5469 0x060C, // 060C ; COMMON 5470 0x060D, // 060D..061A; ARABIC 5471 0x061B, // 061B ; COMMON 5472 0x061C, // 061C..061E; ARABIC 5473 0x061F, // 061F ; COMMON 5474 0x0620, // 0620..063F; ARABIC 5475 0x0640, // 0640 ; COMMON 5476 0x0641, // 0641..064A; ARABIC 5477 0x064B, // 064B..0655; INHERITED 5478 0x0656, // 0656..066F; ARABIC 5479 0x0670, // 0670 ; INHERITED 5480 0x0671, // 0671..06DC; ARABIC 5481 0x06DD, // 06DD ; COMMON 5482 0x06DE, // 06DE..06FF; ARABIC 5483 0x0700, // 0700..070D; SYRIAC 5484 0x070E, // 070E ; UNKNOWN 5485 0x070F, // 070F..074A; SYRIAC 5486 0x074B, // 074B..074C; UNKNOWN 5487 0x074D, // 074D..074F; SYRIAC 5488 0x0750, // 0750..077F; ARABIC 5489 0x0780, // 0780..07B1; THAANA 5490 0x07B2, // 07B2..07BF; UNKNOWN 5491 0x07C0, // 07C0..07FA; NKO 5492 0x07FB, // 07FB..07FC; UNKNOWN 5493 0x07FD, // 07FD..07FF; NKO 5494 0x0800, // 0800..082D; SAMARITAN 5495 0x082E, // 082E..082F; UNKNOWN 5496 0x0830, // 0830..083E; SAMARITAN 5497 0x083F, // 083F ; UNKNOWN 5498 0x0840, // 0840..085B; MANDAIC 5499 0x085C, // 085C..085D; UNKNOWN 5500 0x085E, // 085E ; MANDAIC 5501 0x085F, // 085F ; UNKNOWN 5502 0x0860, // 0860..086A; SYRIAC 5503 0x086B, // 086B..086F; UNKNOWN 5504 0x0870, // 0870..088E; ARABIC 5505 0x088F, // 088F ; UNKNOWN 5506 0x0890, // 0890..0891; ARABIC 5507 0x0892, // 0892..0897; UNKNOWN 5508 0x0898, // 0898..08E1; ARABIC 5509 0x08E2, // 08E2 ; COMMON 5510 0x08E3, // 08E3..08FF; ARABIC 5511 0x0900, // 0900..0950; DEVANAGARI 5512 0x0951, // 0951..0954; INHERITED 5513 0x0955, // 0955..0963; DEVANAGARI 5514 0x0964, // 0964..0965; COMMON 5515 0x0966, // 0966..097F; DEVANAGARI 5516 0x0980, // 0980..0983; BENGALI 5517 0x0984, // 0984 ; UNKNOWN 5518 0x0985, // 0985..098C; BENGALI 5519 0x098D, // 098D..098E; UNKNOWN 5520 0x098F, // 098F..0990; BENGALI 5521 0x0991, // 0991..0992; UNKNOWN 5522 0x0993, // 0993..09A8; BENGALI 5523 0x09A9, // 09A9 ; UNKNOWN 5524 0x09AA, // 09AA..09B0; BENGALI 5525 0x09B1, // 09B1 ; UNKNOWN 5526 0x09B2, // 09B2 ; BENGALI 5527 0x09B3, // 09B3..09B5; UNKNOWN 5528 0x09B6, // 09B6..09B9; BENGALI 5529 0x09BA, // 09BA..09BB; UNKNOWN 5530 0x09BC, // 09BC..09C4; BENGALI 5531 0x09C5, // 09C5..09C6; UNKNOWN 5532 0x09C7, // 09C7..09C8; BENGALI 5533 0x09C9, // 09C9..09CA; UNKNOWN 5534 0x09CB, // 09CB..09CE; BENGALI 5535 0x09CF, // 09CF..09D6; UNKNOWN 5536 0x09D7, // 09D7 ; BENGALI 5537 0x09D8, // 09D8..09DB; UNKNOWN 5538 0x09DC, // 09DC..09DD; BENGALI 5539 0x09DE, // 09DE ; UNKNOWN 5540 0x09DF, // 09DF..09E3; BENGALI 5541 0x09E4, // 09E4..09E5; UNKNOWN 5542 0x09E6, // 09E6..09FE; BENGALI 5543 0x09FF, // 09FF..0A00; UNKNOWN 5544 0x0A01, // 0A01..0A03; GURMUKHI 5545 0x0A04, // 0A04 ; UNKNOWN 5546 0x0A05, // 0A05..0A0A; GURMUKHI 5547 0x0A0B, // 0A0B..0A0E; UNKNOWN 5548 0x0A0F, // 0A0F..0A10; GURMUKHI 5549 0x0A11, // 0A11..0A12; UNKNOWN 5550 0x0A13, // 0A13..0A28; GURMUKHI 5551 0x0A29, // 0A29 ; UNKNOWN 5552 0x0A2A, // 0A2A..0A30; GURMUKHI 5553 0x0A31, // 0A31 ; UNKNOWN 5554 0x0A32, // 0A32..0A33; GURMUKHI 5555 0x0A34, // 0A34 ; UNKNOWN 5556 0x0A35, // 0A35..0A36; GURMUKHI 5557 0x0A37, // 0A37 ; UNKNOWN 5558 0x0A38, // 0A38..0A39; GURMUKHI 5559 0x0A3A, // 0A3A..0A3B; UNKNOWN 5560 0x0A3C, // 0A3C ; GURMUKHI 5561 0x0A3D, // 0A3D ; UNKNOWN 5562 0x0A3E, // 0A3E..0A42; GURMUKHI 5563 0x0A43, // 0A43..0A46; UNKNOWN 5564 0x0A47, // 0A47..0A48; GURMUKHI 5565 0x0A49, // 0A49..0A4A; UNKNOWN 5566 0x0A4B, // 0A4B..0A4D; GURMUKHI 5567 0x0A4E, // 0A4E..0A50; UNKNOWN 5568 0x0A51, // 0A51 ; GURMUKHI 5569 0x0A52, // 0A52..0A58; UNKNOWN 5570 0x0A59, // 0A59..0A5C; GURMUKHI 5571 0x0A5D, // 0A5D ; UNKNOWN 5572 0x0A5E, // 0A5E ; GURMUKHI 5573 0x0A5F, // 0A5F..0A65; UNKNOWN 5574 0x0A66, // 0A66..0A76; GURMUKHI 5575 0x0A77, // 0A77..0A80; UNKNOWN 5576 0x0A81, // 0A81..0A83; GUJARATI 5577 0x0A84, // 0A84 ; UNKNOWN 5578 0x0A85, // 0A85..0A8D; GUJARATI 5579 0x0A8E, // 0A8E ; UNKNOWN 5580 0x0A8F, // 0A8F..0A91; GUJARATI 5581 0x0A92, // 0A92 ; UNKNOWN 5582 0x0A93, // 0A93..0AA8; GUJARATI 5583 0x0AA9, // 0AA9 ; UNKNOWN 5584 0x0AAA, // 0AAA..0AB0; GUJARATI 5585 0x0AB1, // 0AB1 ; UNKNOWN 5586 0x0AB2, // 0AB2..0AB3; GUJARATI 5587 0x0AB4, // 0AB4 ; UNKNOWN 5588 0x0AB5, // 0AB5..0AB9; GUJARATI 5589 0x0ABA, // 0ABA..0ABB; UNKNOWN 5590 0x0ABC, // 0ABC..0AC5; GUJARATI 5591 0x0AC6, // 0AC6 ; UNKNOWN 5592 0x0AC7, // 0AC7..0AC9; GUJARATI 5593 0x0ACA, // 0ACA ; UNKNOWN 5594 0x0ACB, // 0ACB..0ACD; GUJARATI 5595 0x0ACE, // 0ACE..0ACF; UNKNOWN 5596 0x0AD0, // 0AD0 ; GUJARATI 5597 0x0AD1, // 0AD1..0ADF; UNKNOWN 5598 0x0AE0, // 0AE0..0AE3; GUJARATI 5599 0x0AE4, // 0AE4..0AE5; UNKNOWN 5600 0x0AE6, // 0AE6..0AF1; GUJARATI 5601 0x0AF2, // 0AF2..0AF8; UNKNOWN 5602 0x0AF9, // 0AF9..0AFF; GUJARATI 5603 0x0B00, // 0B00 ; UNKNOWN 5604 0x0B01, // 0B01..0B03; ORIYA 5605 0x0B04, // 0B04 ; UNKNOWN 5606 0x0B05, // 0B05..0B0C; ORIYA 5607 0x0B0D, // 0B0D..0B0E; UNKNOWN 5608 0x0B0F, // 0B0F..0B10; ORIYA 5609 0x0B11, // 0B11..0B12; UNKNOWN 5610 0x0B13, // 0B13..0B28; ORIYA 5611 0x0B29, // 0B29 ; UNKNOWN 5612 0x0B2A, // 0B2A..0B30; ORIYA 5613 0x0B31, // 0B31 ; UNKNOWN 5614 0x0B32, // 0B32..0B33; ORIYA 5615 0x0B34, // 0B34 ; UNKNOWN 5616 0x0B35, // 0B35..0B39; ORIYA 5617 0x0B3A, // 0B3A..0B3B; UNKNOWN 5618 0x0B3C, // 0B3C..0B44; ORIYA 5619 0x0B45, // 0B45..0B46; UNKNOWN 5620 0x0B47, // 0B47..0B48; ORIYA 5621 0x0B49, // 0B49..0B4A; UNKNOWN 5622 0x0B4B, // 0B4B..0B4D; ORIYA 5623 0x0B4E, // 0B4E..0B54; UNKNOWN 5624 0x0B55, // 0B55..0B57; ORIYA 5625 0x0B58, // 0B58..0B5B; UNKNOWN 5626 0x0B5C, // 0B5C..0B5D; ORIYA 5627 0x0B5E, // 0B5E ; UNKNOWN 5628 0x0B5F, // 0B5F..0B63; ORIYA 5629 0x0B64, // 0B64..0B65; UNKNOWN 5630 0x0B66, // 0B66..0B77; ORIYA 5631 0x0B78, // 0B78..0B81; UNKNOWN 5632 0x0B82, // 0B82..0B83; TAMIL 5633 0x0B84, // 0B84 ; UNKNOWN 5634 0x0B85, // 0B85..0B8A; TAMIL 5635 0x0B8B, // 0B8B..0B8D; UNKNOWN 5636 0x0B8E, // 0B8E..0B90; TAMIL 5637 0x0B91, // 0B91 ; UNKNOWN 5638 0x0B92, // 0B92..0B95; TAMIL 5639 0x0B96, // 0B96..0B98; UNKNOWN 5640 0x0B99, // 0B99..0B9A; TAMIL 5641 0x0B9B, // 0B9B ; UNKNOWN 5642 0x0B9C, // 0B9C ; TAMIL 5643 0x0B9D, // 0B9D ; UNKNOWN 5644 0x0B9E, // 0B9E..0B9F; TAMIL 5645 0x0BA0, // 0BA0..0BA2; UNKNOWN 5646 0x0BA3, // 0BA3..0BA4; TAMIL 5647 0x0BA5, // 0BA5..0BA7; UNKNOWN 5648 0x0BA8, // 0BA8..0BAA; TAMIL 5649 0x0BAB, // 0BAB..0BAD; UNKNOWN 5650 0x0BAE, // 0BAE..0BB9; TAMIL 5651 0x0BBA, // 0BBA..0BBD; UNKNOWN 5652 0x0BBE, // 0BBE..0BC2; TAMIL 5653 0x0BC3, // 0BC3..0BC5; UNKNOWN 5654 0x0BC6, // 0BC6..0BC8; TAMIL 5655 0x0BC9, // 0BC9 ; UNKNOWN 5656 0x0BCA, // 0BCA..0BCD; TAMIL 5657 0x0BCE, // 0BCE..0BCF; UNKNOWN 5658 0x0BD0, // 0BD0 ; TAMIL 5659 0x0BD1, // 0BD1..0BD6; UNKNOWN 5660 0x0BD7, // 0BD7 ; TAMIL 5661 0x0BD8, // 0BD8..0BE5; UNKNOWN 5662 0x0BE6, // 0BE6..0BFA; TAMIL 5663 0x0BFB, // 0BFB..0BFF; UNKNOWN 5664 0x0C00, // 0C00..0C0C; TELUGU 5665 0x0C0D, // 0C0D ; UNKNOWN 5666 0x0C0E, // 0C0E..0C10; TELUGU 5667 0x0C11, // 0C11 ; UNKNOWN 5668 0x0C12, // 0C12..0C28; TELUGU 5669 0x0C29, // 0C29 ; UNKNOWN 5670 0x0C2A, // 0C2A..0C39; TELUGU 5671 0x0C3A, // 0C3A..0C3B; UNKNOWN 5672 0x0C3C, // 0C3C..0C44; TELUGU 5673 0x0C45, // 0C45 ; UNKNOWN 5674 0x0C46, // 0C46..0C48; TELUGU 5675 0x0C49, // 0C49 ; UNKNOWN 5676 0x0C4A, // 0C4A..0C4D; TELUGU 5677 0x0C4E, // 0C4E..0C54; UNKNOWN 5678 0x0C55, // 0C55..0C56; TELUGU 5679 0x0C57, // 0C57 ; UNKNOWN 5680 0x0C58, // 0C58..0C5A; TELUGU 5681 0x0C5B, // 0C5B..0C5C; UNKNOWN 5682 0x0C5D, // 0C5D ; TELUGU 5683 0x0C5E, // 0C5E..0C5F; UNKNOWN 5684 0x0C60, // 0C60..0C63; TELUGU 5685 0x0C64, // 0C64..0C65; UNKNOWN 5686 0x0C66, // 0C66..0C6F; TELUGU 5687 0x0C70, // 0C70..0C76; UNKNOWN 5688 0x0C77, // 0C77..0C7F; TELUGU 5689 0x0C80, // 0C80..0C8C; KANNADA 5690 0x0C8D, // 0C8D ; UNKNOWN 5691 0x0C8E, // 0C8E..0C90; KANNADA 5692 0x0C91, // 0C91 ; UNKNOWN 5693 0x0C92, // 0C92..0CA8; KANNADA 5694 0x0CA9, // 0CA9 ; UNKNOWN 5695 0x0CAA, // 0CAA..0CB3; KANNADA 5696 0x0CB4, // 0CB4 ; UNKNOWN 5697 0x0CB5, // 0CB5..0CB9; KANNADA 5698 0x0CBA, // 0CBA..0CBB; UNKNOWN 5699 0x0CBC, // 0CBC..0CC4; KANNADA 5700 0x0CC5, // 0CC5 ; UNKNOWN 5701 0x0CC6, // 0CC6..0CC8; KANNADA 5702 0x0CC9, // 0CC9 ; UNKNOWN 5703 0x0CCA, // 0CCA..0CCD; KANNADA 5704 0x0CCE, // 0CCE..0CD4; UNKNOWN 5705 0x0CD5, // 0CD5..0CD6; KANNADA 5706 0x0CD7, // 0CD7..0CDC; UNKNOWN 5707 0x0CDD, // 0CDD..0CDE; KANNADA 5708 0x0CDF, // 0CDF ; UNKNOWN 5709 0x0CE0, // 0CE0..0CE3; KANNADA 5710 0x0CE4, // 0CE4..0CE5; UNKNOWN 5711 0x0CE6, // 0CE6..0CEF; KANNADA 5712 0x0CF0, // 0CF0 ; UNKNOWN 5713 0x0CF1, // 0CF1..0CF3; KANNADA 5714 0x0CF4, // 0CF4..0CFF; UNKNOWN 5715 0x0D00, // 0D00..0D0C; MALAYALAM 5716 0x0D0D, // 0D0D ; UNKNOWN 5717 0x0D0E, // 0D0E..0D10; MALAYALAM 5718 0x0D11, // 0D11 ; UNKNOWN 5719 0x0D12, // 0D12..0D44; MALAYALAM 5720 0x0D45, // 0D45 ; UNKNOWN 5721 0x0D46, // 0D46..0D48; MALAYALAM 5722 0x0D49, // 0D49 ; UNKNOWN 5723 0x0D4A, // 0D4A..0D4F; MALAYALAM 5724 0x0D50, // 0D50..0D53; UNKNOWN 5725 0x0D54, // 0D54..0D63; MALAYALAM 5726 0x0D64, // 0D64..0D65; UNKNOWN 5727 0x0D66, // 0D66..0D7F; MALAYALAM 5728 0x0D80, // 0D80 ; UNKNOWN 5729 0x0D81, // 0D81..0D83; SINHALA 5730 0x0D84, // 0D84 ; UNKNOWN 5731 0x0D85, // 0D85..0D96; SINHALA 5732 0x0D97, // 0D97..0D99; UNKNOWN 5733 0x0D9A, // 0D9A..0DB1; SINHALA 5734 0x0DB2, // 0DB2 ; UNKNOWN 5735 0x0DB3, // 0DB3..0DBB; SINHALA 5736 0x0DBC, // 0DBC ; UNKNOWN 5737 0x0DBD, // 0DBD ; SINHALA 5738 0x0DBE, // 0DBE..0DBF; UNKNOWN 5739 0x0DC0, // 0DC0..0DC6; SINHALA 5740 0x0DC7, // 0DC7..0DC9; UNKNOWN 5741 0x0DCA, // 0DCA ; SINHALA 5742 0x0DCB, // 0DCB..0DCE; UNKNOWN 5743 0x0DCF, // 0DCF..0DD4; SINHALA 5744 0x0DD5, // 0DD5 ; UNKNOWN 5745 0x0DD6, // 0DD6 ; SINHALA 5746 0x0DD7, // 0DD7 ; UNKNOWN 5747 0x0DD8, // 0DD8..0DDF; SINHALA 5748 0x0DE0, // 0DE0..0DE5; UNKNOWN 5749 0x0DE6, // 0DE6..0DEF; SINHALA 5750 0x0DF0, // 0DF0..0DF1; UNKNOWN 5751 0x0DF2, // 0DF2..0DF4; SINHALA 5752 0x0DF5, // 0DF5..0E00; UNKNOWN 5753 0x0E01, // 0E01..0E3A; THAI 5754 0x0E3B, // 0E3B..0E3E; UNKNOWN 5755 0x0E3F, // 0E3F ; COMMON 5756 0x0E40, // 0E40..0E5B; THAI 5757 0x0E5C, // 0E5C..0E80; UNKNOWN 5758 0x0E81, // 0E81..0E82; LAO 5759 0x0E83, // 0E83 ; UNKNOWN 5760 0x0E84, // 0E84 ; LAO 5761 0x0E85, // 0E85 ; UNKNOWN 5762 0x0E86, // 0E86..0E8A; LAO 5763 0x0E8B, // 0E8B ; UNKNOWN 5764 0x0E8C, // 0E8C..0EA3; LAO 5765 0x0EA4, // 0EA4 ; UNKNOWN 5766 0x0EA5, // 0EA5 ; LAO 5767 0x0EA6, // 0EA6 ; UNKNOWN 5768 0x0EA7, // 0EA7..0EBD; LAO 5769 0x0EBE, // 0EBE..0EBF; UNKNOWN 5770 0x0EC0, // 0EC0..0EC4; LAO 5771 0x0EC5, // 0EC5 ; UNKNOWN 5772 0x0EC6, // 0EC6 ; LAO 5773 0x0EC7, // 0EC7 ; UNKNOWN 5774 0x0EC8, // 0EC8..0ECE; LAO 5775 0x0ECF, // 0ECF ; UNKNOWN 5776 0x0ED0, // 0ED0..0ED9; LAO 5777 0x0EDA, // 0EDA..0EDB; UNKNOWN 5778 0x0EDC, // 0EDC..0EDF; LAO 5779 0x0EE0, // 0EE0..0EFF; UNKNOWN 5780 0x0F00, // 0F00..0F47; TIBETAN 5781 0x0F48, // 0F48 ; UNKNOWN 5782 0x0F49, // 0F49..0F6C; TIBETAN 5783 0x0F6D, // 0F6D..0F70; UNKNOWN 5784 0x0F71, // 0F71..0F97; TIBETAN 5785 0x0F98, // 0F98 ; UNKNOWN 5786 0x0F99, // 0F99..0FBC; TIBETAN 5787 0x0FBD, // 0FBD ; UNKNOWN 5788 0x0FBE, // 0FBE..0FCC; TIBETAN 5789 0x0FCD, // 0FCD ; UNKNOWN 5790 0x0FCE, // 0FCE..0FD4; TIBETAN 5791 0x0FD5, // 0FD5..0FD8; COMMON 5792 0x0FD9, // 0FD9..0FDA; TIBETAN 5793 0x0FDB, // 0FDB..0FFF; UNKNOWN 5794 0x1000, // 1000..109F; MYANMAR 5795 0x10A0, // 10A0..10C5; GEORGIAN 5796 0x10C6, // 10C6 ; UNKNOWN 5797 0x10C7, // 10C7 ; GEORGIAN 5798 0x10C8, // 10C8..10CC; UNKNOWN 5799 0x10CD, // 10CD ; GEORGIAN 5800 0x10CE, // 10CE..10CF; UNKNOWN 5801 0x10D0, // 10D0..10FA; GEORGIAN 5802 0x10FB, // 10FB ; COMMON 5803 0x10FC, // 10FC..10FF; GEORGIAN 5804 0x1100, // 1100..11FF; HANGUL 5805 0x1200, // 1200..1248; ETHIOPIC 5806 0x1249, // 1249 ; UNKNOWN 5807 0x124A, // 124A..124D; ETHIOPIC 5808 0x124E, // 124E..124F; UNKNOWN 5809 0x1250, // 1250..1256; ETHIOPIC 5810 0x1257, // 1257 ; UNKNOWN 5811 0x1258, // 1258 ; ETHIOPIC 5812 0x1259, // 1259 ; UNKNOWN 5813 0x125A, // 125A..125D; ETHIOPIC 5814 0x125E, // 125E..125F; UNKNOWN 5815 0x1260, // 1260..1288; ETHIOPIC 5816 0x1289, // 1289 ; UNKNOWN 5817 0x128A, // 128A..128D; ETHIOPIC 5818 0x128E, // 128E..128F; UNKNOWN 5819 0x1290, // 1290..12B0; ETHIOPIC 5820 0x12B1, // 12B1 ; UNKNOWN 5821 0x12B2, // 12B2..12B5; ETHIOPIC 5822 0x12B6, // 12B6..12B7; UNKNOWN 5823 0x12B8, // 12B8..12BE; ETHIOPIC 5824 0x12BF, // 12BF ; UNKNOWN 5825 0x12C0, // 12C0 ; ETHIOPIC 5826 0x12C1, // 12C1 ; UNKNOWN 5827 0x12C2, // 12C2..12C5; ETHIOPIC 5828 0x12C6, // 12C6..12C7; UNKNOWN 5829 0x12C8, // 12C8..12D6; ETHIOPIC 5830 0x12D7, // 12D7 ; UNKNOWN 5831 0x12D8, // 12D8..1310; ETHIOPIC 5832 0x1311, // 1311 ; UNKNOWN 5833 0x1312, // 1312..1315; ETHIOPIC 5834 0x1316, // 1316..1317; UNKNOWN 5835 0x1318, // 1318..135A; ETHIOPIC 5836 0x135B, // 135B..135C; UNKNOWN 5837 0x135D, // 135D..137C; ETHIOPIC 5838 0x137D, // 137D..137F; UNKNOWN 5839 0x1380, // 1380..1399; ETHIOPIC 5840 0x139A, // 139A..139F; UNKNOWN 5841 0x13A0, // 13A0..13F5; CHEROKEE 5842 0x13F6, // 13F6..13F7; UNKNOWN 5843 0x13F8, // 13F8..13FD; CHEROKEE 5844 0x13FE, // 13FE..13FF; UNKNOWN 5845 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5846 0x1680, // 1680..169C; OGHAM 5847 0x169D, // 169D..169F; UNKNOWN 5848 0x16A0, // 16A0..16EA; RUNIC 5849 0x16EB, // 16EB..16ED; COMMON 5850 0x16EE, // 16EE..16F8; RUNIC 5851 0x16F9, // 16F9..16FF; UNKNOWN 5852 0x1700, // 1700..1715; TAGALOG 5853 0x1716, // 1716..171E; UNKNOWN 5854 0x171F, // 171F ; TAGALOG 5855 0x1720, // 1720..1734; HANUNOO 5856 0x1735, // 1735..1736; COMMON 5857 0x1737, // 1737..173F; UNKNOWN 5858 0x1740, // 1740..1753; BUHID 5859 0x1754, // 1754..175F; UNKNOWN 5860 0x1760, // 1760..176C; TAGBANWA 5861 0x176D, // 176D ; UNKNOWN 5862 0x176E, // 176E..1770; TAGBANWA 5863 0x1771, // 1771 ; UNKNOWN 5864 0x1772, // 1772..1773; TAGBANWA 5865 0x1774, // 1774..177F; UNKNOWN 5866 0x1780, // 1780..17DD; KHMER 5867 0x17DE, // 17DE..17DF; UNKNOWN 5868 0x17E0, // 17E0..17E9; KHMER 5869 0x17EA, // 17EA..17EF; UNKNOWN 5870 0x17F0, // 17F0..17F9; KHMER 5871 0x17FA, // 17FA..17FF; UNKNOWN 5872 0x1800, // 1800..1801; MONGOLIAN 5873 0x1802, // 1802..1803; COMMON 5874 0x1804, // 1804 ; MONGOLIAN 5875 0x1805, // 1805 ; COMMON 5876 0x1806, // 1806..1819; MONGOLIAN 5877 0x181A, // 181A..181F; UNKNOWN 5878 0x1820, // 1820..1878; MONGOLIAN 5879 0x1879, // 1879..187F; UNKNOWN 5880 0x1880, // 1880..18AA; MONGOLIAN 5881 0x18AB, // 18AB..18AF; UNKNOWN 5882 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5883 0x18F6, // 18F6..18FF; UNKNOWN 5884 0x1900, // 1900..191E; LIMBU 5885 0x191F, // 191F ; UNKNOWN 5886 0x1920, // 1920..192B; LIMBU 5887 0x192C, // 192C..192F; UNKNOWN 5888 0x1930, // 1930..193B; LIMBU 5889 0x193C, // 193C..193F; UNKNOWN 5890 0x1940, // 1940 ; LIMBU 5891 0x1941, // 1941..1943; UNKNOWN 5892 0x1944, // 1944..194F; LIMBU 5893 0x1950, // 1950..196D; TAI_LE 5894 0x196E, // 196E..196F; UNKNOWN 5895 0x1970, // 1970..1974; TAI_LE 5896 0x1975, // 1975..197F; UNKNOWN 5897 0x1980, // 1980..19AB; NEW_TAI_LUE 5898 0x19AC, // 19AC..19AF; UNKNOWN 5899 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5900 0x19CA, // 19CA..19CF; UNKNOWN 5901 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5902 0x19DB, // 19DB..19DD; UNKNOWN 5903 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5904 0x19E0, // 19E0..19FF; KHMER 5905 0x1A00, // 1A00..1A1B; BUGINESE 5906 0x1A1C, // 1A1C..1A1D; UNKNOWN 5907 0x1A1E, // 1A1E..1A1F; BUGINESE 5908 0x1A20, // 1A20..1A5E; TAI_THAM 5909 0x1A5F, // 1A5F ; UNKNOWN 5910 0x1A60, // 1A60..1A7C; TAI_THAM 5911 0x1A7D, // 1A7D..1A7E; UNKNOWN 5912 0x1A7F, // 1A7F..1A89; TAI_THAM 5913 0x1A8A, // 1A8A..1A8F; UNKNOWN 5914 0x1A90, // 1A90..1A99; TAI_THAM 5915 0x1A9A, // 1A9A..1A9F; UNKNOWN 5916 0x1AA0, // 1AA0..1AAD; TAI_THAM 5917 0x1AAE, // 1AAE..1AAF; UNKNOWN 5918 0x1AB0, // 1AB0..1ACE; INHERITED 5919 0x1ACF, // 1ACF..1AFF; UNKNOWN 5920 0x1B00, // 1B00..1B4C; BALINESE 5921 0x1B4D, // 1B4D..1B4F; UNKNOWN 5922 0x1B50, // 1B50..1B7E; BALINESE 5923 0x1B7F, // 1B7F ; UNKNOWN 5924 0x1B80, // 1B80..1BBF; SUNDANESE 5925 0x1BC0, // 1BC0..1BF3; BATAK 5926 0x1BF4, // 1BF4..1BFB; UNKNOWN 5927 0x1BFC, // 1BFC..1BFF; BATAK 5928 0x1C00, // 1C00..1C37; LEPCHA 5929 0x1C38, // 1C38..1C3A; UNKNOWN 5930 0x1C3B, // 1C3B..1C49; LEPCHA 5931 0x1C4A, // 1C4A..1C4C; UNKNOWN 5932 0x1C4D, // 1C4D..1C4F; LEPCHA 5933 0x1C50, // 1C50..1C7F; OL_CHIKI 5934 0x1C80, // 1C80..1C88; CYRILLIC 5935 0x1C89, // 1C89..1C8F; UNKNOWN 5936 0x1C90, // 1C90..1CBA; GEORGIAN 5937 0x1CBB, // 1CBB..1CBC; UNKNOWN 5938 0x1CBD, // 1CBD..1CBF; GEORGIAN 5939 0x1CC0, // 1CC0..1CC7; SUNDANESE 5940 0x1CC8, // 1CC8..1CCF; UNKNOWN 5941 0x1CD0, // 1CD0..1CD2; INHERITED 5942 0x1CD3, // 1CD3 ; COMMON 5943 0x1CD4, // 1CD4..1CE0; INHERITED 5944 0x1CE1, // 1CE1 ; COMMON 5945 0x1CE2, // 1CE2..1CE8; INHERITED 5946 0x1CE9, // 1CE9..1CEC; COMMON 5947 0x1CED, // 1CED ; INHERITED 5948 0x1CEE, // 1CEE..1CF3; COMMON 5949 0x1CF4, // 1CF4 ; INHERITED 5950 0x1CF5, // 1CF5..1CF7; COMMON 5951 0x1CF8, // 1CF8..1CF9; INHERITED 5952 0x1CFA, // 1CFA ; COMMON 5953 0x1CFB, // 1CFB..1CFF; UNKNOWN 5954 0x1D00, // 1D00..1D25; LATIN 5955 0x1D26, // 1D26..1D2A; GREEK 5956 0x1D2B, // 1D2B ; CYRILLIC 5957 0x1D2C, // 1D2C..1D5C; LATIN 5958 0x1D5D, // 1D5D..1D61; GREEK 5959 0x1D62, // 1D62..1D65; LATIN 5960 0x1D66, // 1D66..1D6A; GREEK 5961 0x1D6B, // 1D6B..1D77; LATIN 5962 0x1D78, // 1D78 ; CYRILLIC 5963 0x1D79, // 1D79..1DBE; LATIN 5964 0x1DBF, // 1DBF ; GREEK 5965 0x1DC0, // 1DC0..1DFF; INHERITED 5966 0x1E00, // 1E00..1EFF; LATIN 5967 0x1F00, // 1F00..1F15; GREEK 5968 0x1F16, // 1F16..1F17; UNKNOWN 5969 0x1F18, // 1F18..1F1D; GREEK 5970 0x1F1E, // 1F1E..1F1F; UNKNOWN 5971 0x1F20, // 1F20..1F45; GREEK 5972 0x1F46, // 1F46..1F47; UNKNOWN 5973 0x1F48, // 1F48..1F4D; GREEK 5974 0x1F4E, // 1F4E..1F4F; UNKNOWN 5975 0x1F50, // 1F50..1F57; GREEK 5976 0x1F58, // 1F58 ; UNKNOWN 5977 0x1F59, // 1F59 ; GREEK 5978 0x1F5A, // 1F5A ; UNKNOWN 5979 0x1F5B, // 1F5B ; GREEK 5980 0x1F5C, // 1F5C ; UNKNOWN 5981 0x1F5D, // 1F5D ; GREEK 5982 0x1F5E, // 1F5E ; UNKNOWN 5983 0x1F5F, // 1F5F..1F7D; GREEK 5984 0x1F7E, // 1F7E..1F7F; UNKNOWN 5985 0x1F80, // 1F80..1FB4; GREEK 5986 0x1FB5, // 1FB5 ; UNKNOWN 5987 0x1FB6, // 1FB6..1FC4; GREEK 5988 0x1FC5, // 1FC5 ; UNKNOWN 5989 0x1FC6, // 1FC6..1FD3; GREEK 5990 0x1FD4, // 1FD4..1FD5; UNKNOWN 5991 0x1FD6, // 1FD6..1FDB; GREEK 5992 0x1FDC, // 1FDC ; UNKNOWN 5993 0x1FDD, // 1FDD..1FEF; GREEK 5994 0x1FF0, // 1FF0..1FF1; UNKNOWN 5995 0x1FF2, // 1FF2..1FF4; GREEK 5996 0x1FF5, // 1FF5 ; UNKNOWN 5997 0x1FF6, // 1FF6..1FFE; GREEK 5998 0x1FFF, // 1FFF ; UNKNOWN 5999 0x2000, // 2000..200B; COMMON 6000 0x200C, // 200C..200D; INHERITED 6001 0x200E, // 200E..2064; COMMON 6002 0x2065, // 2065 ; UNKNOWN 6003 0x2066, // 2066..2070; COMMON 6004 0x2071, // 2071 ; LATIN 6005 0x2072, // 2072..2073; UNKNOWN 6006 0x2074, // 2074..207E; COMMON 6007 0x207F, // 207F ; LATIN 6008 0x2080, // 2080..208E; COMMON 6009 0x208F, // 208F ; UNKNOWN 6010 0x2090, // 2090..209C; LATIN 6011 0x209D, // 209D..209F; UNKNOWN 6012 0x20A0, // 20A0..20C0; COMMON 6013 0x20C1, // 20C1..20CF; UNKNOWN 6014 0x20D0, // 20D0..20F0; INHERITED 6015 0x20F1, // 20F1..20FF; UNKNOWN 6016 0x2100, // 2100..2125; COMMON 6017 0x2126, // 2126 ; GREEK 6018 0x2127, // 2127..2129; COMMON 6019 0x212A, // 212A..212B; LATIN 6020 0x212C, // 212C..2131; COMMON 6021 0x2132, // 2132 ; LATIN 6022 0x2133, // 2133..214D; COMMON 6023 0x214E, // 214E ; LATIN 6024 0x214F, // 214F..215F; COMMON 6025 0x2160, // 2160..2188; LATIN 6026 0x2189, // 2189..218B; COMMON 6027 0x218C, // 218C..218F; UNKNOWN 6028 0x2190, // 2190..2426; COMMON 6029 0x2427, // 2427..243F; UNKNOWN 6030 0x2440, // 2440..244A; COMMON 6031 0x244B, // 244B..245F; UNKNOWN 6032 0x2460, // 2460..27FF; COMMON 6033 0x2800, // 2800..28FF; BRAILLE 6034 0x2900, // 2900..2B73; COMMON 6035 0x2B74, // 2B74..2B75; UNKNOWN 6036 0x2B76, // 2B76..2B95; COMMON 6037 0x2B96, // 2B96 ; UNKNOWN 6038 0x2B97, // 2B97..2BFF; COMMON 6039 0x2C00, // 2C00..2C5F; GLAGOLITIC 6040 0x2C60, // 2C60..2C7F; LATIN 6041 0x2C80, // 2C80..2CF3; COPTIC 6042 0x2CF4, // 2CF4..2CF8; UNKNOWN 6043 0x2CF9, // 2CF9..2CFF; COPTIC 6044 0x2D00, // 2D00..2D25; GEORGIAN 6045 0x2D26, // 2D26 ; UNKNOWN 6046 0x2D27, // 2D27 ; GEORGIAN 6047 0x2D28, // 2D28..2D2C; UNKNOWN 6048 0x2D2D, // 2D2D ; GEORGIAN 6049 0x2D2E, // 2D2E..2D2F; UNKNOWN 6050 0x2D30, // 2D30..2D67; TIFINAGH 6051 0x2D68, // 2D68..2D6E; UNKNOWN 6052 0x2D6F, // 2D6F..2D70; TIFINAGH 6053 0x2D71, // 2D71..2D7E; UNKNOWN 6054 0x2D7F, // 2D7F ; TIFINAGH 6055 0x2D80, // 2D80..2D96; ETHIOPIC 6056 0x2D97, // 2D97..2D9F; UNKNOWN 6057 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6058 0x2DA7, // 2DA7 ; UNKNOWN 6059 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6060 0x2DAF, // 2DAF ; UNKNOWN 6061 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6062 0x2DB7, // 2DB7 ; UNKNOWN 6063 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6064 0x2DBF, // 2DBF ; UNKNOWN 6065 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6066 0x2DC7, // 2DC7 ; UNKNOWN 6067 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6068 0x2DCF, // 2DCF ; UNKNOWN 6069 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6070 0x2DD7, // 2DD7 ; UNKNOWN 6071 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6072 0x2DDF, // 2DDF ; UNKNOWN 6073 0x2DE0, // 2DE0..2DFF; CYRILLIC 6074 0x2E00, // 2E00..2E5D; COMMON 6075 0x2E5E, // 2E5E..2E7F; UNKNOWN 6076 0x2E80, // 2E80..2E99; HAN 6077 0x2E9A, // 2E9A ; UNKNOWN 6078 0x2E9B, // 2E9B..2EF3; HAN 6079 0x2EF4, // 2EF4..2EFF; UNKNOWN 6080 0x2F00, // 2F00..2FD5; HAN 6081 0x2FD6, // 2FD6..2FEF; UNKNOWN 6082 0x2FF0, // 2FF0..3004; COMMON 6083 0x3005, // 3005 ; HAN 6084 0x3006, // 3006 ; COMMON 6085 0x3007, // 3007 ; HAN 6086 0x3008, // 3008..3020; COMMON 6087 0x3021, // 3021..3029; HAN 6088 0x302A, // 302A..302D; INHERITED 6089 0x302E, // 302E..302F; HANGUL 6090 0x3030, // 3030..3037; COMMON 6091 0x3038, // 3038..303B; HAN 6092 0x303C, // 303C..303F; COMMON 6093 0x3040, // 3040 ; UNKNOWN 6094 0x3041, // 3041..3096; HIRAGANA 6095 0x3097, // 3097..3098; UNKNOWN 6096 0x3099, // 3099..309A; INHERITED 6097 0x309B, // 309B..309C; COMMON 6098 0x309D, // 309D..309F; HIRAGANA 6099 0x30A0, // 30A0 ; COMMON 6100 0x30A1, // 30A1..30FA; KATAKANA 6101 0x30FB, // 30FB..30FC; COMMON 6102 0x30FD, // 30FD..30FF; KATAKANA 6103 0x3100, // 3100..3104; UNKNOWN 6104 0x3105, // 3105..312F; BOPOMOFO 6105 0x3130, // 3130 ; UNKNOWN 6106 0x3131, // 3131..318E; HANGUL 6107 0x318F, // 318F ; UNKNOWN 6108 0x3190, // 3190..319F; COMMON 6109 0x31A0, // 31A0..31BF; BOPOMOFO 6110 0x31C0, // 31C0..31E3; COMMON 6111 0x31E4, // 31E4..31EE; UNKNOWN 6112 0x31EF, // 31EF ; COMMON 6113 0x31F0, // 31F0..31FF; KATAKANA 6114 0x3200, // 3200..321E; HANGUL 6115 0x321F, // 321F ; UNKNOWN 6116 0x3220, // 3220..325F; COMMON 6117 0x3260, // 3260..327E; HANGUL 6118 0x327F, // 327F..32CF; COMMON 6119 0x32D0, // 32D0..32FE; KATAKANA 6120 0x32FF, // 32FF ; COMMON 6121 0x3300, // 3300..3357; KATAKANA 6122 0x3358, // 3358..33FF; COMMON 6123 0x3400, // 3400..4DBF; HAN 6124 0x4DC0, // 4DC0..4DFF; COMMON 6125 0x4E00, // 4E00..9FFF; HAN 6126 0xA000, // A000..A48C; YI 6127 0xA48D, // A48D..A48F; UNKNOWN 6128 0xA490, // A490..A4C6; YI 6129 0xA4C7, // A4C7..A4CF; UNKNOWN 6130 0xA4D0, // A4D0..A4FF; LISU 6131 0xA500, // A500..A62B; VAI 6132 0xA62C, // A62C..A63F; UNKNOWN 6133 0xA640, // A640..A69F; CYRILLIC 6134 0xA6A0, // A6A0..A6F7; BAMUM 6135 0xA6F8, // A6F8..A6FF; UNKNOWN 6136 0xA700, // A700..A721; COMMON 6137 0xA722, // A722..A787; LATIN 6138 0xA788, // A788..A78A; COMMON 6139 0xA78B, // A78B..A7CA; LATIN 6140 0xA7CB, // A7CB..A7CF; UNKNOWN 6141 0xA7D0, // A7D0..A7D1; LATIN 6142 0xA7D2, // A7D2 ; UNKNOWN 6143 0xA7D3, // A7D3 ; LATIN 6144 0xA7D4, // A7D4 ; UNKNOWN 6145 0xA7D5, // A7D5..A7D9; LATIN 6146 0xA7DA, // A7DA..A7F1; UNKNOWN 6147 0xA7F2, // A7F2..A7FF; LATIN 6148 0xA800, // A800..A82C; SYLOTI_NAGRI 6149 0xA82D, // A82D..A82F; UNKNOWN 6150 0xA830, // A830..A839; COMMON 6151 0xA83A, // A83A..A83F; UNKNOWN 6152 0xA840, // A840..A877; PHAGS_PA 6153 0xA878, // A878..A87F; UNKNOWN 6154 0xA880, // A880..A8C5; SAURASHTRA 6155 0xA8C6, // A8C6..A8CD; UNKNOWN 6156 0xA8CE, // A8CE..A8D9; SAURASHTRA 6157 0xA8DA, // A8DA..A8DF; UNKNOWN 6158 0xA8E0, // A8E0..A8FF; DEVANAGARI 6159 0xA900, // A900..A92D; KAYAH_LI 6160 0xA92E, // A92E ; COMMON 6161 0xA92F, // A92F ; KAYAH_LI 6162 0xA930, // A930..A953; REJANG 6163 0xA954, // A954..A95E; UNKNOWN 6164 0xA95F, // A95F ; REJANG 6165 0xA960, // A960..A97C; HANGUL 6166 0xA97D, // A97D..A97F; UNKNOWN 6167 0xA980, // A980..A9CD; JAVANESE 6168 0xA9CE, // A9CE ; UNKNOWN 6169 0xA9CF, // A9CF ; COMMON 6170 0xA9D0, // A9D0..A9D9; JAVANESE 6171 0xA9DA, // A9DA..A9DD; UNKNOWN 6172 0xA9DE, // A9DE..A9DF; JAVANESE 6173 0xA9E0, // A9E0..A9FE; MYANMAR 6174 0xA9FF, // A9FF ; UNKNOWN 6175 0xAA00, // AA00..AA36; CHAM 6176 0xAA37, // AA37..AA3F; UNKNOWN 6177 0xAA40, // AA40..AA4D; CHAM 6178 0xAA4E, // AA4E..AA4F; UNKNOWN 6179 0xAA50, // AA50..AA59; CHAM 6180 0xAA5A, // AA5A..AA5B; UNKNOWN 6181 0xAA5C, // AA5C..AA5F; CHAM 6182 0xAA60, // AA60..AA7F; MYANMAR 6183 0xAA80, // AA80..AAC2; TAI_VIET 6184 0xAAC3, // AAC3..AADA; UNKNOWN 6185 0xAADB, // AADB..AADF; TAI_VIET 6186 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6187 0xAAF7, // AAF7..AB00; UNKNOWN 6188 0xAB01, // AB01..AB06; ETHIOPIC 6189 0xAB07, // AB07..AB08; UNKNOWN 6190 0xAB09, // AB09..AB0E; ETHIOPIC 6191 0xAB0F, // AB0F..AB10; UNKNOWN 6192 0xAB11, // AB11..AB16; ETHIOPIC 6193 0xAB17, // AB17..AB1F; UNKNOWN 6194 0xAB20, // AB20..AB26; ETHIOPIC 6195 0xAB27, // AB27 ; UNKNOWN 6196 0xAB28, // AB28..AB2E; ETHIOPIC 6197 0xAB2F, // AB2F ; UNKNOWN 6198 0xAB30, // AB30..AB5A; LATIN 6199 0xAB5B, // AB5B ; COMMON 6200 0xAB5C, // AB5C..AB64; LATIN 6201 0xAB65, // AB65 ; GREEK 6202 0xAB66, // AB66..AB69; LATIN 6203 0xAB6A, // AB6A..AB6B; COMMON 6204 0xAB6C, // AB6C..AB6F; UNKNOWN 6205 0xAB70, // AB70..ABBF; CHEROKEE 6206 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6207 0xABEE, // ABEE..ABEF; UNKNOWN 6208 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6209 0xABFA, // ABFA..ABFF; UNKNOWN 6210 0xAC00, // AC00..D7A3; HANGUL 6211 0xD7A4, // D7A4..D7AF; UNKNOWN 6212 0xD7B0, // D7B0..D7C6; HANGUL 6213 0xD7C7, // D7C7..D7CA; UNKNOWN 6214 0xD7CB, // D7CB..D7FB; HANGUL 6215 0xD7FC, // D7FC..F8FF; UNKNOWN 6216 0xF900, // F900..FA6D; HAN 6217 0xFA6E, // FA6E..FA6F; UNKNOWN 6218 0xFA70, // FA70..FAD9; HAN 6219 0xFADA, // FADA..FAFF; UNKNOWN 6220 0xFB00, // FB00..FB06; LATIN 6221 0xFB07, // FB07..FB12; UNKNOWN 6222 0xFB13, // FB13..FB17; ARMENIAN 6223 0xFB18, // FB18..FB1C; UNKNOWN 6224 0xFB1D, // FB1D..FB36; HEBREW 6225 0xFB37, // FB37 ; UNKNOWN 6226 0xFB38, // FB38..FB3C; HEBREW 6227 0xFB3D, // FB3D ; UNKNOWN 6228 0xFB3E, // FB3E ; HEBREW 6229 0xFB3F, // FB3F ; UNKNOWN 6230 0xFB40, // FB40..FB41; HEBREW 6231 0xFB42, // FB42 ; UNKNOWN 6232 0xFB43, // FB43..FB44; HEBREW 6233 0xFB45, // FB45 ; UNKNOWN 6234 0xFB46, // FB46..FB4F; HEBREW 6235 0xFB50, // FB50..FBC2; ARABIC 6236 0xFBC3, // FBC3..FBD2; UNKNOWN 6237 0xFBD3, // FBD3..FD3D; ARABIC 6238 0xFD3E, // FD3E..FD3F; COMMON 6239 0xFD40, // FD40..FD8F; ARABIC 6240 0xFD90, // FD90..FD91; UNKNOWN 6241 0xFD92, // FD92..FDC7; ARABIC 6242 0xFDC8, // FDC8..FDCE; UNKNOWN 6243 0xFDCF, // FDCF ; ARABIC 6244 0xFDD0, // FDD0..FDEF; UNKNOWN 6245 0xFDF0, // FDF0..FDFF; ARABIC 6246 0xFE00, // FE00..FE0F; INHERITED 6247 0xFE10, // FE10..FE19; COMMON 6248 0xFE1A, // FE1A..FE1F; UNKNOWN 6249 0xFE20, // FE20..FE2D; INHERITED 6250 0xFE2E, // FE2E..FE2F; CYRILLIC 6251 0xFE30, // FE30..FE52; COMMON 6252 0xFE53, // FE53 ; UNKNOWN 6253 0xFE54, // FE54..FE66; COMMON 6254 0xFE67, // FE67 ; UNKNOWN 6255 0xFE68, // FE68..FE6B; COMMON 6256 0xFE6C, // FE6C..FE6F; UNKNOWN 6257 0xFE70, // FE70..FE74; ARABIC 6258 0xFE75, // FE75 ; UNKNOWN 6259 0xFE76, // FE76..FEFC; ARABIC 6260 0xFEFD, // FEFD..FEFE; UNKNOWN 6261 0xFEFF, // FEFF ; COMMON 6262 0xFF00, // FF00 ; UNKNOWN 6263 0xFF01, // FF01..FF20; COMMON 6264 0xFF21, // FF21..FF3A; LATIN 6265 0xFF3B, // FF3B..FF40; COMMON 6266 0xFF41, // FF41..FF5A; LATIN 6267 0xFF5B, // FF5B..FF65; COMMON 6268 0xFF66, // FF66..FF6F; KATAKANA 6269 0xFF70, // FF70 ; COMMON 6270 0xFF71, // FF71..FF9D; KATAKANA 6271 0xFF9E, // FF9E..FF9F; COMMON 6272 0xFFA0, // FFA0..FFBE; HANGUL 6273 0xFFBF, // FFBF..FFC1; UNKNOWN 6274 0xFFC2, // FFC2..FFC7; HANGUL 6275 0xFFC8, // FFC8..FFC9; UNKNOWN 6276 0xFFCA, // FFCA..FFCF; HANGUL 6277 0xFFD0, // FFD0..FFD1; UNKNOWN 6278 0xFFD2, // FFD2..FFD7; HANGUL 6279 0xFFD8, // FFD8..FFD9; UNKNOWN 6280 0xFFDA, // FFDA..FFDC; HANGUL 6281 0xFFDD, // FFDD..FFDF; UNKNOWN 6282 0xFFE0, // FFE0..FFE6; COMMON 6283 0xFFE7, // FFE7 ; UNKNOWN 6284 0xFFE8, // FFE8..FFEE; COMMON 6285 0xFFEF, // FFEF..FFF8; UNKNOWN 6286 0xFFF9, // FFF9..FFFD; COMMON 6287 0xFFFE, // FFFE..FFFF; UNKNOWN 6288 0x10000, // 10000..1000B; LINEAR_B 6289 0x1000C, // 1000C ; UNKNOWN 6290 0x1000D, // 1000D..10026; LINEAR_B 6291 0x10027, // 10027 ; UNKNOWN 6292 0x10028, // 10028..1003A; LINEAR_B 6293 0x1003B, // 1003B ; UNKNOWN 6294 0x1003C, // 1003C..1003D; LINEAR_B 6295 0x1003E, // 1003E ; UNKNOWN 6296 0x1003F, // 1003F..1004D; LINEAR_B 6297 0x1004E, // 1004E..1004F; UNKNOWN 6298 0x10050, // 10050..1005D; LINEAR_B 6299 0x1005E, // 1005E..1007F; UNKNOWN 6300 0x10080, // 10080..100FA; LINEAR_B 6301 0x100FB, // 100FB..100FF; UNKNOWN 6302 0x10100, // 10100..10102; COMMON 6303 0x10103, // 10103..10106; UNKNOWN 6304 0x10107, // 10107..10133; COMMON 6305 0x10134, // 10134..10136; UNKNOWN 6306 0x10137, // 10137..1013F; COMMON 6307 0x10140, // 10140..1018E; GREEK 6308 0x1018F, // 1018F ; UNKNOWN 6309 0x10190, // 10190..1019C; COMMON 6310 0x1019D, // 1019D..1019F; UNKNOWN 6311 0x101A0, // 101A0 ; GREEK 6312 0x101A1, // 101A1..101CF; UNKNOWN 6313 0x101D0, // 101D0..101FC; COMMON 6314 0x101FD, // 101FD ; INHERITED 6315 0x101FE, // 101FE..1027F; UNKNOWN 6316 0x10280, // 10280..1029C; LYCIAN 6317 0x1029D, // 1029D..1029F; UNKNOWN 6318 0x102A0, // 102A0..102D0; CARIAN 6319 0x102D1, // 102D1..102DF; UNKNOWN 6320 0x102E0, // 102E0 ; INHERITED 6321 0x102E1, // 102E1..102FB; COMMON 6322 0x102FC, // 102FC..102FF; UNKNOWN 6323 0x10300, // 10300..10323; OLD_ITALIC 6324 0x10324, // 10324..1032C; UNKNOWN 6325 0x1032D, // 1032D..1032F; OLD_ITALIC 6326 0x10330, // 10330..1034A; GOTHIC 6327 0x1034B, // 1034B..1034F; UNKNOWN 6328 0x10350, // 10350..1037A; OLD_PERMIC 6329 0x1037B, // 1037B..1037F; UNKNOWN 6330 0x10380, // 10380..1039D; UGARITIC 6331 0x1039E, // 1039E ; UNKNOWN 6332 0x1039F, // 1039F ; UGARITIC 6333 0x103A0, // 103A0..103C3; OLD_PERSIAN 6334 0x103C4, // 103C4..103C7; UNKNOWN 6335 0x103C8, // 103C8..103D5; OLD_PERSIAN 6336 0x103D6, // 103D6..103FF; UNKNOWN 6337 0x10400, // 10400..1044F; DESERET 6338 0x10450, // 10450..1047F; SHAVIAN 6339 0x10480, // 10480..1049D; OSMANYA 6340 0x1049E, // 1049E..1049F; UNKNOWN 6341 0x104A0, // 104A0..104A9; OSMANYA 6342 0x104AA, // 104AA..104AF; UNKNOWN 6343 0x104B0, // 104B0..104D3; OSAGE 6344 0x104D4, // 104D4..104D7; UNKNOWN 6345 0x104D8, // 104D8..104FB; OSAGE 6346 0x104FC, // 104FC..104FF; UNKNOWN 6347 0x10500, // 10500..10527; ELBASAN 6348 0x10528, // 10528..1052F; UNKNOWN 6349 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6350 0x10564, // 10564..1056E; UNKNOWN 6351 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6352 0x10570, // 10570..1057A; VITHKUQI 6353 0x1057B, // 1057B ; UNKNOWN 6354 0x1057C, // 1057C..1058A; VITHKUQI 6355 0x1058B, // 1058B ; UNKNOWN 6356 0x1058C, // 1058C..10592; VITHKUQI 6357 0x10593, // 10593 ; UNKNOWN 6358 0x10594, // 10594..10595; VITHKUQI 6359 0x10596, // 10596 ; UNKNOWN 6360 0x10597, // 10597..105A1; VITHKUQI 6361 0x105A2, // 105A2 ; UNKNOWN 6362 0x105A3, // 105A3..105B1; VITHKUQI 6363 0x105B2, // 105B2 ; UNKNOWN 6364 0x105B3, // 105B3..105B9; VITHKUQI 6365 0x105BA, // 105BA ; UNKNOWN 6366 0x105BB, // 105BB..105BC; VITHKUQI 6367 0x105BD, // 105BD..105FF; UNKNOWN 6368 0x10600, // 10600..10736; LINEAR_A 6369 0x10737, // 10737..1073F; UNKNOWN 6370 0x10740, // 10740..10755; LINEAR_A 6371 0x10756, // 10756..1075F; UNKNOWN 6372 0x10760, // 10760..10767; LINEAR_A 6373 0x10768, // 10768..1077F; UNKNOWN 6374 0x10780, // 10780..10785; LATIN 6375 0x10786, // 10786 ; UNKNOWN 6376 0x10787, // 10787..107B0; LATIN 6377 0x107B1, // 107B1 ; UNKNOWN 6378 0x107B2, // 107B2..107BA; LATIN 6379 0x107BB, // 107BB..107FF; UNKNOWN 6380 0x10800, // 10800..10805; CYPRIOT 6381 0x10806, // 10806..10807; UNKNOWN 6382 0x10808, // 10808 ; CYPRIOT 6383 0x10809, // 10809 ; UNKNOWN 6384 0x1080A, // 1080A..10835; CYPRIOT 6385 0x10836, // 10836 ; UNKNOWN 6386 0x10837, // 10837..10838; CYPRIOT 6387 0x10839, // 10839..1083B; UNKNOWN 6388 0x1083C, // 1083C ; CYPRIOT 6389 0x1083D, // 1083D..1083E; UNKNOWN 6390 0x1083F, // 1083F ; CYPRIOT 6391 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6392 0x10856, // 10856 ; UNKNOWN 6393 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6394 0x10860, // 10860..1087F; PALMYRENE 6395 0x10880, // 10880..1089E; NABATAEAN 6396 0x1089F, // 1089F..108A6; UNKNOWN 6397 0x108A7, // 108A7..108AF; NABATAEAN 6398 0x108B0, // 108B0..108DF; UNKNOWN 6399 0x108E0, // 108E0..108F2; HATRAN 6400 0x108F3, // 108F3 ; UNKNOWN 6401 0x108F4, // 108F4..108F5; HATRAN 6402 0x108F6, // 108F6..108FA; UNKNOWN 6403 0x108FB, // 108FB..108FF; HATRAN 6404 0x10900, // 10900..1091B; PHOENICIAN 6405 0x1091C, // 1091C..1091E; UNKNOWN 6406 0x1091F, // 1091F ; PHOENICIAN 6407 0x10920, // 10920..10939; LYDIAN 6408 0x1093A, // 1093A..1093E; UNKNOWN 6409 0x1093F, // 1093F ; LYDIAN 6410 0x10940, // 10940..1097F; UNKNOWN 6411 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6412 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6413 0x109B8, // 109B8..109BB; UNKNOWN 6414 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6415 0x109D0, // 109D0..109D1; UNKNOWN 6416 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6417 0x10A00, // 10A00..10A03; KHAROSHTHI 6418 0x10A04, // 10A04 ; UNKNOWN 6419 0x10A05, // 10A05..10A06; KHAROSHTHI 6420 0x10A07, // 10A07..10A0B; UNKNOWN 6421 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6422 0x10A14, // 10A14 ; UNKNOWN 6423 0x10A15, // 10A15..10A17; KHAROSHTHI 6424 0x10A18, // 10A18 ; UNKNOWN 6425 0x10A19, // 10A19..10A35; KHAROSHTHI 6426 0x10A36, // 10A36..10A37; UNKNOWN 6427 0x10A38, // 10A38..10A3A; KHAROSHTHI 6428 0x10A3B, // 10A3B..10A3E; UNKNOWN 6429 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6430 0x10A49, // 10A49..10A4F; UNKNOWN 6431 0x10A50, // 10A50..10A58; KHAROSHTHI 6432 0x10A59, // 10A59..10A5F; UNKNOWN 6433 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6434 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6435 0x10AA0, // 10AA0..10ABF; UNKNOWN 6436 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6437 0x10AE7, // 10AE7..10AEA; UNKNOWN 6438 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6439 0x10AF7, // 10AF7..10AFF; UNKNOWN 6440 0x10B00, // 10B00..10B35; AVESTAN 6441 0x10B36, // 10B36..10B38; UNKNOWN 6442 0x10B39, // 10B39..10B3F; AVESTAN 6443 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6444 0x10B56, // 10B56..10B57; UNKNOWN 6445 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6446 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6447 0x10B73, // 10B73..10B77; UNKNOWN 6448 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6449 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6450 0x10B92, // 10B92..10B98; UNKNOWN 6451 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6452 0x10B9D, // 10B9D..10BA8; UNKNOWN 6453 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6454 0x10BB0, // 10BB0..10BFF; UNKNOWN 6455 0x10C00, // 10C00..10C48; OLD_TURKIC 6456 0x10C49, // 10C49..10C7F; UNKNOWN 6457 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6458 0x10CB3, // 10CB3..10CBF; UNKNOWN 6459 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6460 0x10CF3, // 10CF3..10CF9; UNKNOWN 6461 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6462 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6463 0x10D28, // 10D28..10D2F; UNKNOWN 6464 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6465 0x10D3A, // 10D3A..10E5F; UNKNOWN 6466 0x10E60, // 10E60..10E7E; ARABIC 6467 0x10E7F, // 10E7F ; UNKNOWN 6468 0x10E80, // 10E80..10EA9; YEZIDI 6469 0x10EAA, // 10EAA ; UNKNOWN 6470 0x10EAB, // 10EAB..10EAD; YEZIDI 6471 0x10EAE, // 10EAE..10EAF; UNKNOWN 6472 0x10EB0, // 10EB0..10EB1; YEZIDI 6473 0x10EB2, // 10EB2..10EFC; UNKNOWN 6474 0x10EFD, // 10EFD..10EFF; ARABIC 6475 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6476 0x10F28, // 10F28..10F2F; UNKNOWN 6477 0x10F30, // 10F30..10F59; SOGDIAN 6478 0x10F5A, // 10F5A..10F6F; UNKNOWN 6479 0x10F70, // 10F70..10F89; OLD_UYGHUR 6480 0x10F8A, // 10F8A..10FAF; UNKNOWN 6481 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6482 0x10FCC, // 10FCC..10FDF; UNKNOWN 6483 0x10FE0, // 10FE0..10FF6; ELYMAIC 6484 0x10FF7, // 10FF7..10FFF; UNKNOWN 6485 0x11000, // 11000..1104D; BRAHMI 6486 0x1104E, // 1104E..11051; UNKNOWN 6487 0x11052, // 11052..11075; BRAHMI 6488 0x11076, // 11076..1107E; UNKNOWN 6489 0x1107F, // 1107F ; BRAHMI 6490 0x11080, // 11080..110C2; KAITHI 6491 0x110C3, // 110C3..110CC; UNKNOWN 6492 0x110CD, // 110CD ; KAITHI 6493 0x110CE, // 110CE..110CF; UNKNOWN 6494 0x110D0, // 110D0..110E8; SORA_SOMPENG 6495 0x110E9, // 110E9..110EF; UNKNOWN 6496 0x110F0, // 110F0..110F9; SORA_SOMPENG 6497 0x110FA, // 110FA..110FF; UNKNOWN 6498 0x11100, // 11100..11134; CHAKMA 6499 0x11135, // 11135 ; UNKNOWN 6500 0x11136, // 11136..11147; CHAKMA 6501 0x11148, // 11148..1114F; UNKNOWN 6502 0x11150, // 11150..11176; MAHAJANI 6503 0x11177, // 11177..1117F; UNKNOWN 6504 0x11180, // 11180..111DF; SHARADA 6505 0x111E0, // 111E0 ; UNKNOWN 6506 0x111E1, // 111E1..111F4; SINHALA 6507 0x111F5, // 111F5..111FF; UNKNOWN 6508 0x11200, // 11200..11211; KHOJKI 6509 0x11212, // 11212 ; UNKNOWN 6510 0x11213, // 11213..11241; KHOJKI 6511 0x11242, // 11242..1127F; UNKNOWN 6512 0x11280, // 11280..11286; MULTANI 6513 0x11287, // 11287 ; UNKNOWN 6514 0x11288, // 11288 ; MULTANI 6515 0x11289, // 11289 ; UNKNOWN 6516 0x1128A, // 1128A..1128D; MULTANI 6517 0x1128E, // 1128E ; UNKNOWN 6518 0x1128F, // 1128F..1129D; MULTANI 6519 0x1129E, // 1129E ; UNKNOWN 6520 0x1129F, // 1129F..112A9; MULTANI 6521 0x112AA, // 112AA..112AF; UNKNOWN 6522 0x112B0, // 112B0..112EA; KHUDAWADI 6523 0x112EB, // 112EB..112EF; UNKNOWN 6524 0x112F0, // 112F0..112F9; KHUDAWADI 6525 0x112FA, // 112FA..112FF; UNKNOWN 6526 0x11300, // 11300..11303; GRANTHA 6527 0x11304, // 11304 ; UNKNOWN 6528 0x11305, // 11305..1130C; GRANTHA 6529 0x1130D, // 1130D..1130E; UNKNOWN 6530 0x1130F, // 1130F..11310; GRANTHA 6531 0x11311, // 11311..11312; UNKNOWN 6532 0x11313, // 11313..11328; GRANTHA 6533 0x11329, // 11329 ; UNKNOWN 6534 0x1132A, // 1132A..11330; GRANTHA 6535 0x11331, // 11331 ; UNKNOWN 6536 0x11332, // 11332..11333; GRANTHA 6537 0x11334, // 11334 ; UNKNOWN 6538 0x11335, // 11335..11339; GRANTHA 6539 0x1133A, // 1133A ; UNKNOWN 6540 0x1133B, // 1133B ; INHERITED 6541 0x1133C, // 1133C..11344; GRANTHA 6542 0x11345, // 11345..11346; UNKNOWN 6543 0x11347, // 11347..11348; GRANTHA 6544 0x11349, // 11349..1134A; UNKNOWN 6545 0x1134B, // 1134B..1134D; GRANTHA 6546 0x1134E, // 1134E..1134F; UNKNOWN 6547 0x11350, // 11350 ; GRANTHA 6548 0x11351, // 11351..11356; UNKNOWN 6549 0x11357, // 11357 ; GRANTHA 6550 0x11358, // 11358..1135C; UNKNOWN 6551 0x1135D, // 1135D..11363; GRANTHA 6552 0x11364, // 11364..11365; UNKNOWN 6553 0x11366, // 11366..1136C; GRANTHA 6554 0x1136D, // 1136D..1136F; UNKNOWN 6555 0x11370, // 11370..11374; GRANTHA 6556 0x11375, // 11375..113FF; UNKNOWN 6557 0x11400, // 11400..1145B; NEWA 6558 0x1145C, // 1145C ; UNKNOWN 6559 0x1145D, // 1145D..11461; NEWA 6560 0x11462, // 11462..1147F; UNKNOWN 6561 0x11480, // 11480..114C7; TIRHUTA 6562 0x114C8, // 114C8..114CF; UNKNOWN 6563 0x114D0, // 114D0..114D9; TIRHUTA 6564 0x114DA, // 114DA..1157F; UNKNOWN 6565 0x11580, // 11580..115B5; SIDDHAM 6566 0x115B6, // 115B6..115B7; UNKNOWN 6567 0x115B8, // 115B8..115DD; SIDDHAM 6568 0x115DE, // 115DE..115FF; UNKNOWN 6569 0x11600, // 11600..11644; MODI 6570 0x11645, // 11645..1164F; UNKNOWN 6571 0x11650, // 11650..11659; MODI 6572 0x1165A, // 1165A..1165F; UNKNOWN 6573 0x11660, // 11660..1166C; MONGOLIAN 6574 0x1166D, // 1166D..1167F; UNKNOWN 6575 0x11680, // 11680..116B9; TAKRI 6576 0x116BA, // 116BA..116BF; UNKNOWN 6577 0x116C0, // 116C0..116C9; TAKRI 6578 0x116CA, // 116CA..116FF; UNKNOWN 6579 0x11700, // 11700..1171A; AHOM 6580 0x1171B, // 1171B..1171C; UNKNOWN 6581 0x1171D, // 1171D..1172B; AHOM 6582 0x1172C, // 1172C..1172F; UNKNOWN 6583 0x11730, // 11730..11746; AHOM 6584 0x11747, // 11747..117FF; UNKNOWN 6585 0x11800, // 11800..1183B; DOGRA 6586 0x1183C, // 1183C..1189F; UNKNOWN 6587 0x118A0, // 118A0..118F2; WARANG_CITI 6588 0x118F3, // 118F3..118FE; UNKNOWN 6589 0x118FF, // 118FF ; WARANG_CITI 6590 0x11900, // 11900..11906; DIVES_AKURU 6591 0x11907, // 11907..11908; UNKNOWN 6592 0x11909, // 11909 ; DIVES_AKURU 6593 0x1190A, // 1190A..1190B; UNKNOWN 6594 0x1190C, // 1190C..11913; DIVES_AKURU 6595 0x11914, // 11914 ; UNKNOWN 6596 0x11915, // 11915..11916; DIVES_AKURU 6597 0x11917, // 11917 ; UNKNOWN 6598 0x11918, // 11918..11935; DIVES_AKURU 6599 0x11936, // 11936 ; UNKNOWN 6600 0x11937, // 11937..11938; DIVES_AKURU 6601 0x11939, // 11939..1193A; UNKNOWN 6602 0x1193B, // 1193B..11946; DIVES_AKURU 6603 0x11947, // 11947..1194F; UNKNOWN 6604 0x11950, // 11950..11959; DIVES_AKURU 6605 0x1195A, // 1195A..1199F; UNKNOWN 6606 0x119A0, // 119A0..119A7; NANDINAGARI 6607 0x119A8, // 119A8..119A9; UNKNOWN 6608 0x119AA, // 119AA..119D7; NANDINAGARI 6609 0x119D8, // 119D8..119D9; UNKNOWN 6610 0x119DA, // 119DA..119E4; NANDINAGARI 6611 0x119E5, // 119E5..119FF; UNKNOWN 6612 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6613 0x11A48, // 11A48..11A4F; UNKNOWN 6614 0x11A50, // 11A50..11AA2; SOYOMBO 6615 0x11AA3, // 11AA3..11AAF; UNKNOWN 6616 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6617 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6618 0x11AF9, // 11AF9..11AFF; UNKNOWN 6619 0x11B00, // 11B00..11B09; DEVANAGARI 6620 0x11B0A, // 11B0A..11BFF; UNKNOWN 6621 0x11C00, // 11C00..11C08; BHAIKSUKI 6622 0x11C09, // 11C09 ; UNKNOWN 6623 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6624 0x11C37, // 11C37 ; UNKNOWN 6625 0x11C38, // 11C38..11C45; BHAIKSUKI 6626 0x11C46, // 11C46..11C4F; UNKNOWN 6627 0x11C50, // 11C50..11C6C; BHAIKSUKI 6628 0x11C6D, // 11C6D..11C6F; UNKNOWN 6629 0x11C70, // 11C70..11C8F; MARCHEN 6630 0x11C90, // 11C90..11C91; UNKNOWN 6631 0x11C92, // 11C92..11CA7; MARCHEN 6632 0x11CA8, // 11CA8 ; UNKNOWN 6633 0x11CA9, // 11CA9..11CB6; MARCHEN 6634 0x11CB7, // 11CB7..11CFF; UNKNOWN 6635 0x11D00, // 11D00..11D06; MASARAM_GONDI 6636 0x11D07, // 11D07 ; UNKNOWN 6637 0x11D08, // 11D08..11D09; MASARAM_GONDI 6638 0x11D0A, // 11D0A ; UNKNOWN 6639 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6640 0x11D37, // 11D37..11D39; UNKNOWN 6641 0x11D3A, // 11D3A ; MASARAM_GONDI 6642 0x11D3B, // 11D3B ; UNKNOWN 6643 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6644 0x11D3E, // 11D3E ; UNKNOWN 6645 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6646 0x11D48, // 11D48..11D4F; UNKNOWN 6647 0x11D50, // 11D50..11D59; MASARAM_GONDI 6648 0x11D5A, // 11D5A..11D5F; UNKNOWN 6649 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6650 0x11D66, // 11D66 ; UNKNOWN 6651 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6652 0x11D69, // 11D69 ; UNKNOWN 6653 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6654 0x11D8F, // 11D8F ; UNKNOWN 6655 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6656 0x11D92, // 11D92 ; UNKNOWN 6657 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6658 0x11D99, // 11D99..11D9F; UNKNOWN 6659 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6660 0x11DAA, // 11DAA..11EDF; UNKNOWN 6661 0x11EE0, // 11EE0..11EF8; MAKASAR 6662 0x11EF9, // 11EF9..11EFF; UNKNOWN 6663 0x11F00, // 11F00..11F10; KAWI 6664 0x11F11, // 11F11 ; UNKNOWN 6665 0x11F12, // 11F12..11F3A; KAWI 6666 0x11F3B, // 11F3B..11F3D; UNKNOWN 6667 0x11F3E, // 11F3E..11F59; KAWI 6668 0x11F5A, // 11F5A..11FAF; UNKNOWN 6669 0x11FB0, // 11FB0 ; LISU 6670 0x11FB1, // 11FB1..11FBF; UNKNOWN 6671 0x11FC0, // 11FC0..11FF1; TAMIL 6672 0x11FF2, // 11FF2..11FFE; UNKNOWN 6673 0x11FFF, // 11FFF ; TAMIL 6674 0x12000, // 12000..12399; CUNEIFORM 6675 0x1239A, // 1239A..123FF; UNKNOWN 6676 0x12400, // 12400..1246E; CUNEIFORM 6677 0x1246F, // 1246F ; UNKNOWN 6678 0x12470, // 12470..12474; CUNEIFORM 6679 0x12475, // 12475..1247F; UNKNOWN 6680 0x12480, // 12480..12543; CUNEIFORM 6681 0x12544, // 12544..12F8F; UNKNOWN 6682 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6683 0x12FF3, // 12FF3..12FFF; UNKNOWN 6684 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6685 0x13456, // 13456..143FF; UNKNOWN 6686 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6687 0x14647, // 14647..167FF; UNKNOWN 6688 0x16800, // 16800..16A38; BAMUM 6689 0x16A39, // 16A39..16A3F; UNKNOWN 6690 0x16A40, // 16A40..16A5E; MRO 6691 0x16A5F, // 16A5F ; UNKNOWN 6692 0x16A60, // 16A60..16A69; MRO 6693 0x16A6A, // 16A6A..16A6D; UNKNOWN 6694 0x16A6E, // 16A6E..16A6F; MRO 6695 0x16A70, // 16A70..16ABE; TANGSA 6696 0x16ABF, // 16ABF ; UNKNOWN 6697 0x16AC0, // 16AC0..16AC9; TANGSA 6698 0x16ACA, // 16ACA..16ACF; UNKNOWN 6699 0x16AD0, // 16AD0..16AED; BASSA_VAH 6700 0x16AEE, // 16AEE..16AEF; UNKNOWN 6701 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6702 0x16AF6, // 16AF6..16AFF; UNKNOWN 6703 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6704 0x16B46, // 16B46..16B4F; UNKNOWN 6705 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6706 0x16B5A, // 16B5A ; UNKNOWN 6707 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6708 0x16B62, // 16B62 ; UNKNOWN 6709 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6710 0x16B78, // 16B78..16B7C; UNKNOWN 6711 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6712 0x16B90, // 16B90..16E3F; UNKNOWN 6713 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6714 0x16E9B, // 16E9B..16EFF; UNKNOWN 6715 0x16F00, // 16F00..16F4A; MIAO 6716 0x16F4B, // 16F4B..16F4E; UNKNOWN 6717 0x16F4F, // 16F4F..16F87; MIAO 6718 0x16F88, // 16F88..16F8E; UNKNOWN 6719 0x16F8F, // 16F8F..16F9F; MIAO 6720 0x16FA0, // 16FA0..16FDF; UNKNOWN 6721 0x16FE0, // 16FE0 ; TANGUT 6722 0x16FE1, // 16FE1 ; NUSHU 6723 0x16FE2, // 16FE2..16FE3; HAN 6724 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6725 0x16FE5, // 16FE5..16FEF; UNKNOWN 6726 0x16FF0, // 16FF0..16FF1; HAN 6727 0x16FF2, // 16FF2..16FFF; UNKNOWN 6728 0x17000, // 17000..187F7; TANGUT 6729 0x187F8, // 187F8..187FF; UNKNOWN 6730 0x18800, // 18800..18AFF; TANGUT 6731 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6732 0x18CD6, // 18CD6..18CFF; UNKNOWN 6733 0x18D00, // 18D00..18D08; TANGUT 6734 0x18D09, // 18D09..1AFEF; UNKNOWN 6735 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 6736 0x1AFF4, // 1AFF4 ; UNKNOWN 6737 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 6738 0x1AFFC, // 1AFFC ; UNKNOWN 6739 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 6740 0x1AFFF, // 1AFFF ; UNKNOWN 6741 0x1B000, // 1B000 ; KATAKANA 6742 0x1B001, // 1B001..1B11F; HIRAGANA 6743 0x1B120, // 1B120..1B122; KATAKANA 6744 0x1B123, // 1B123..1B131; UNKNOWN 6745 0x1B132, // 1B132 ; HIRAGANA 6746 0x1B133, // 1B133..1B14F; UNKNOWN 6747 0x1B150, // 1B150..1B152; HIRAGANA 6748 0x1B153, // 1B153..1B154; UNKNOWN 6749 0x1B155, // 1B155 ; KATAKANA 6750 0x1B156, // 1B156..1B163; UNKNOWN 6751 0x1B164, // 1B164..1B167; KATAKANA 6752 0x1B168, // 1B168..1B16F; UNKNOWN 6753 0x1B170, // 1B170..1B2FB; NUSHU 6754 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6755 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6756 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6757 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6758 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6759 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6760 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6761 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6762 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6763 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6764 0x1BCA0, // 1BCA0..1BCA3; COMMON 6765 0x1BCA4, // 1BCA4..1CEFF; UNKNOWN 6766 0x1CF00, // 1CF00..1CF2D; INHERITED 6767 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 6768 0x1CF30, // 1CF30..1CF46; INHERITED 6769 0x1CF47, // 1CF47..1CF4F; UNKNOWN 6770 0x1CF50, // 1CF50..1CFC3; COMMON 6771 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 6772 0x1D000, // 1D000..1D0F5; COMMON 6773 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6774 0x1D100, // 1D100..1D126; COMMON 6775 0x1D127, // 1D127..1D128; UNKNOWN 6776 0x1D129, // 1D129..1D166; COMMON 6777 0x1D167, // 1D167..1D169; INHERITED 6778 0x1D16A, // 1D16A..1D17A; COMMON 6779 0x1D17B, // 1D17B..1D182; INHERITED 6780 0x1D183, // 1D183..1D184; COMMON 6781 0x1D185, // 1D185..1D18B; INHERITED 6782 0x1D18C, // 1D18C..1D1A9; COMMON 6783 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6784 0x1D1AE, // 1D1AE..1D1EA; COMMON 6785 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 6786 0x1D200, // 1D200..1D245; GREEK 6787 0x1D246, // 1D246..1D2BF; UNKNOWN 6788 0x1D2C0, // 1D2C0..1D2D3; COMMON 6789 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 6790 0x1D2E0, // 1D2E0..1D2F3; COMMON 6791 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6792 0x1D300, // 1D300..1D356; COMMON 6793 0x1D357, // 1D357..1D35F; UNKNOWN 6794 0x1D360, // 1D360..1D378; COMMON 6795 0x1D379, // 1D379..1D3FF; UNKNOWN 6796 0x1D400, // 1D400..1D454; COMMON 6797 0x1D455, // 1D455 ; UNKNOWN 6798 0x1D456, // 1D456..1D49C; COMMON 6799 0x1D49D, // 1D49D ; UNKNOWN 6800 0x1D49E, // 1D49E..1D49F; COMMON 6801 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6802 0x1D4A2, // 1D4A2 ; COMMON 6803 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6804 0x1D4A5, // 1D4A5..1D4A6; COMMON 6805 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6806 0x1D4A9, // 1D4A9..1D4AC; COMMON 6807 0x1D4AD, // 1D4AD ; UNKNOWN 6808 0x1D4AE, // 1D4AE..1D4B9; COMMON 6809 0x1D4BA, // 1D4BA ; UNKNOWN 6810 0x1D4BB, // 1D4BB ; COMMON 6811 0x1D4BC, // 1D4BC ; UNKNOWN 6812 0x1D4BD, // 1D4BD..1D4C3; COMMON 6813 0x1D4C4, // 1D4C4 ; UNKNOWN 6814 0x1D4C5, // 1D4C5..1D505; COMMON 6815 0x1D506, // 1D506 ; UNKNOWN 6816 0x1D507, // 1D507..1D50A; COMMON 6817 0x1D50B, // 1D50B..1D50C; UNKNOWN 6818 0x1D50D, // 1D50D..1D514; COMMON 6819 0x1D515, // 1D515 ; UNKNOWN 6820 0x1D516, // 1D516..1D51C; COMMON 6821 0x1D51D, // 1D51D ; UNKNOWN 6822 0x1D51E, // 1D51E..1D539; COMMON 6823 0x1D53A, // 1D53A ; UNKNOWN 6824 0x1D53B, // 1D53B..1D53E; COMMON 6825 0x1D53F, // 1D53F ; UNKNOWN 6826 0x1D540, // 1D540..1D544; COMMON 6827 0x1D545, // 1D545 ; UNKNOWN 6828 0x1D546, // 1D546 ; COMMON 6829 0x1D547, // 1D547..1D549; UNKNOWN 6830 0x1D54A, // 1D54A..1D550; COMMON 6831 0x1D551, // 1D551 ; UNKNOWN 6832 0x1D552, // 1D552..1D6A5; COMMON 6833 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6834 0x1D6A8, // 1D6A8..1D7CB; COMMON 6835 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6836 0x1D7CE, // 1D7CE..1D7FF; COMMON 6837 0x1D800, // 1D800..1DA8B; SIGNWRITING 6838 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6839 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6840 0x1DAA0, // 1DAA0 ; UNKNOWN 6841 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6842 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 6843 0x1DF00, // 1DF00..1DF1E; LATIN 6844 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 6845 0x1DF25, // 1DF25..1DF2A; LATIN 6846 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 6847 0x1E000, // 1E000..1E006; GLAGOLITIC 6848 0x1E007, // 1E007 ; UNKNOWN 6849 0x1E008, // 1E008..1E018; GLAGOLITIC 6850 0x1E019, // 1E019..1E01A; UNKNOWN 6851 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6852 0x1E022, // 1E022 ; UNKNOWN 6853 0x1E023, // 1E023..1E024; GLAGOLITIC 6854 0x1E025, // 1E025 ; UNKNOWN 6855 0x1E026, // 1E026..1E02A; GLAGOLITIC 6856 0x1E02B, // 1E02B..1E02F; UNKNOWN 6857 0x1E030, // 1E030..1E06D; CYRILLIC 6858 0x1E06E, // 1E06E..1E08E; UNKNOWN 6859 0x1E08F, // 1E08F ; CYRILLIC 6860 0x1E090, // 1E090..1E0FF; UNKNOWN 6861 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 6862 0x1E12D, // 1E12D..1E12F; UNKNOWN 6863 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 6864 0x1E13E, // 1E13E..1E13F; UNKNOWN 6865 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 6866 0x1E14A, // 1E14A..1E14D; UNKNOWN 6867 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 6868 0x1E150, // 1E150..1E28F; UNKNOWN 6869 0x1E290, // 1E290..1E2AE; TOTO 6870 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 6871 0x1E2C0, // 1E2C0..1E2F9; WANCHO 6872 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 6873 0x1E2FF, // 1E2FF ; WANCHO 6874 0x1E300, // 1E300..1E4CF; UNKNOWN 6875 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 6876 0x1E4FA, // 1E4FA..1E7DF; UNKNOWN 6877 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 6878 0x1E7E7, // 1E7E7 ; UNKNOWN 6879 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 6880 0x1E7EC, // 1E7EC ; UNKNOWN 6881 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 6882 0x1E7EF, // 1E7EF ; UNKNOWN 6883 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 6884 0x1E7FF, // 1E7FF ; UNKNOWN 6885 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6886 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6887 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6888 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6889 0x1E900, // 1E900..1E94B; ADLAM 6890 0x1E94C, // 1E94C..1E94F; UNKNOWN 6891 0x1E950, // 1E950..1E959; ADLAM 6892 0x1E95A, // 1E95A..1E95D; UNKNOWN 6893 0x1E95E, // 1E95E..1E95F; ADLAM 6894 0x1E960, // 1E960..1EC70; UNKNOWN 6895 0x1EC71, // 1EC71..1ECB4; COMMON 6896 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 6897 0x1ED01, // 1ED01..1ED3D; COMMON 6898 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 6899 0x1EE00, // 1EE00..1EE03; ARABIC 6900 0x1EE04, // 1EE04 ; UNKNOWN 6901 0x1EE05, // 1EE05..1EE1F; ARABIC 6902 0x1EE20, // 1EE20 ; UNKNOWN 6903 0x1EE21, // 1EE21..1EE22; ARABIC 6904 0x1EE23, // 1EE23 ; UNKNOWN 6905 0x1EE24, // 1EE24 ; ARABIC 6906 0x1EE25, // 1EE25..1EE26; UNKNOWN 6907 0x1EE27, // 1EE27 ; ARABIC 6908 0x1EE28, // 1EE28 ; UNKNOWN 6909 0x1EE29, // 1EE29..1EE32; ARABIC 6910 0x1EE33, // 1EE33 ; UNKNOWN 6911 0x1EE34, // 1EE34..1EE37; ARABIC 6912 0x1EE38, // 1EE38 ; UNKNOWN 6913 0x1EE39, // 1EE39 ; ARABIC 6914 0x1EE3A, // 1EE3A ; UNKNOWN 6915 0x1EE3B, // 1EE3B ; ARABIC 6916 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6917 0x1EE42, // 1EE42 ; ARABIC 6918 0x1EE43, // 1EE43..1EE46; UNKNOWN 6919 0x1EE47, // 1EE47 ; ARABIC 6920 0x1EE48, // 1EE48 ; UNKNOWN 6921 0x1EE49, // 1EE49 ; ARABIC 6922 0x1EE4A, // 1EE4A ; UNKNOWN 6923 0x1EE4B, // 1EE4B ; ARABIC 6924 0x1EE4C, // 1EE4C ; UNKNOWN 6925 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6926 0x1EE50, // 1EE50 ; UNKNOWN 6927 0x1EE51, // 1EE51..1EE52; ARABIC 6928 0x1EE53, // 1EE53 ; UNKNOWN 6929 0x1EE54, // 1EE54 ; ARABIC 6930 0x1EE55, // 1EE55..1EE56; UNKNOWN 6931 0x1EE57, // 1EE57 ; ARABIC 6932 0x1EE58, // 1EE58 ; UNKNOWN 6933 0x1EE59, // 1EE59 ; ARABIC 6934 0x1EE5A, // 1EE5A ; UNKNOWN 6935 0x1EE5B, // 1EE5B ; ARABIC 6936 0x1EE5C, // 1EE5C ; UNKNOWN 6937 0x1EE5D, // 1EE5D ; ARABIC 6938 0x1EE5E, // 1EE5E ; UNKNOWN 6939 0x1EE5F, // 1EE5F ; ARABIC 6940 0x1EE60, // 1EE60 ; UNKNOWN 6941 0x1EE61, // 1EE61..1EE62; ARABIC 6942 0x1EE63, // 1EE63 ; UNKNOWN 6943 0x1EE64, // 1EE64 ; ARABIC 6944 0x1EE65, // 1EE65..1EE66; UNKNOWN 6945 0x1EE67, // 1EE67..1EE6A; ARABIC 6946 0x1EE6B, // 1EE6B ; UNKNOWN 6947 0x1EE6C, // 1EE6C..1EE72; ARABIC 6948 0x1EE73, // 1EE73 ; UNKNOWN 6949 0x1EE74, // 1EE74..1EE77; ARABIC 6950 0x1EE78, // 1EE78 ; UNKNOWN 6951 0x1EE79, // 1EE79..1EE7C; ARABIC 6952 0x1EE7D, // 1EE7D ; UNKNOWN 6953 0x1EE7E, // 1EE7E ; ARABIC 6954 0x1EE7F, // 1EE7F ; UNKNOWN 6955 0x1EE80, // 1EE80..1EE89; ARABIC 6956 0x1EE8A, // 1EE8A ; UNKNOWN 6957 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6958 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6959 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6960 0x1EEA4, // 1EEA4 ; UNKNOWN 6961 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6962 0x1EEAA, // 1EEAA ; UNKNOWN 6963 0x1EEAB, // 1EEAB..1EEBB; ARABIC 6964 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 6965 0x1EEF0, // 1EEF0..1EEF1; ARABIC 6966 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 6967 0x1F000, // 1F000..1F02B; COMMON 6968 0x1F02C, // 1F02C..1F02F; UNKNOWN 6969 0x1F030, // 1F030..1F093; COMMON 6970 0x1F094, // 1F094..1F09F; UNKNOWN 6971 0x1F0A0, // 1F0A0..1F0AE; COMMON 6972 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 6973 0x1F0B1, // 1F0B1..1F0BF; COMMON 6974 0x1F0C0, // 1F0C0 ; UNKNOWN 6975 0x1F0C1, // 1F0C1..1F0CF; COMMON 6976 0x1F0D0, // 1F0D0 ; UNKNOWN 6977 0x1F0D1, // 1F0D1..1F0F5; COMMON 6978 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 6979 0x1F100, // 1F100..1F1AD; COMMON 6980 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 6981 0x1F1E6, // 1F1E6..1F1FF; COMMON 6982 0x1F200, // 1F200 ; HIRAGANA 6983 0x1F201, // 1F201..1F202; COMMON 6984 0x1F203, // 1F203..1F20F; UNKNOWN 6985 0x1F210, // 1F210..1F23B; COMMON 6986 0x1F23C, // 1F23C..1F23F; UNKNOWN 6987 0x1F240, // 1F240..1F248; COMMON 6988 0x1F249, // 1F249..1F24F; UNKNOWN 6989 0x1F250, // 1F250..1F251; COMMON 6990 0x1F252, // 1F252..1F25F; UNKNOWN 6991 0x1F260, // 1F260..1F265; COMMON 6992 0x1F266, // 1F266..1F2FF; UNKNOWN 6993 0x1F300, // 1F300..1F6D7; COMMON 6994 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN 6995 0x1F6DC, // 1F6DC..1F6EC; COMMON 6996 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 6997 0x1F6F0, // 1F6F0..1F6FC; COMMON 6998 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 6999 0x1F700, // 1F700..1F776; COMMON 7000 0x1F777, // 1F777..1F77A; UNKNOWN 7001 0x1F77B, // 1F77B..1F7D9; COMMON 7002 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 7003 0x1F7E0, // 1F7E0..1F7EB; COMMON 7004 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 7005 0x1F7F0, // 1F7F0 ; COMMON 7006 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 7007 0x1F800, // 1F800..1F80B; COMMON 7008 0x1F80C, // 1F80C..1F80F; UNKNOWN 7009 0x1F810, // 1F810..1F847; COMMON 7010 0x1F848, // 1F848..1F84F; UNKNOWN 7011 0x1F850, // 1F850..1F859; COMMON 7012 0x1F85A, // 1F85A..1F85F; UNKNOWN 7013 0x1F860, // 1F860..1F887; COMMON 7014 0x1F888, // 1F888..1F88F; UNKNOWN 7015 0x1F890, // 1F890..1F8AD; COMMON 7016 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7017 0x1F8B0, // 1F8B0..1F8B1; COMMON 7018 0x1F8B2, // 1F8B2..1F8FF; UNKNOWN 7019 0x1F900, // 1F900..1FA53; COMMON 7020 0x1FA54, // 1FA54..1FA5F; UNKNOWN 7021 0x1FA60, // 1FA60..1FA6D; COMMON 7022 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7023 0x1FA70, // 1FA70..1FA7C; COMMON 7024 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7025 0x1FA80, // 1FA80..1FA88; COMMON 7026 0x1FA89, // 1FA89..1FA8F; UNKNOWN 7027 0x1FA90, // 1FA90..1FABD; COMMON 7028 0x1FABE, // 1FABE ; UNKNOWN 7029 0x1FABF, // 1FABF..1FAC5; COMMON 7030 0x1FAC6, // 1FAC6..1FACD; UNKNOWN 7031 0x1FACE, // 1FACE..1FADB; COMMON 7032 0x1FADC, // 1FADC..1FADF; UNKNOWN 7033 0x1FAE0, // 1FAE0..1FAE8; COMMON 7034 0x1FAE9, // 1FAE9..1FAEF; UNKNOWN 7035 0x1FAF0, // 1FAF0..1FAF8; COMMON 7036 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7037 0x1FB00, // 1FB00..1FB92; COMMON 7038 0x1FB93, // 1FB93 ; UNKNOWN 7039 0x1FB94, // 1FB94..1FBCA; COMMON 7040 0x1FBCB, // 1FBCB..1FBEF; UNKNOWN 7041 0x1FBF0, // 1FBF0..1FBF9; COMMON 7042 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 7043 0x20000, // 20000..2A6DF; HAN 7044 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7045 0x2A700, // 2A700..2B739; HAN 7046 0x2B73A, // 2B73A..2B73F; UNKNOWN 7047 0x2B740, // 2B740..2B81D; HAN 7048 0x2B81E, // 2B81E..2B81F; UNKNOWN 7049 0x2B820, // 2B820..2CEA1; HAN 7050 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 7051 0x2CEB0, // 2CEB0..2EBE0; HAN 7052 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN 7053 0x2EBF0, // 2EBF0..2EE5D; HAN 7054 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN 7055 0x2F800, // 2F800..2FA1D; HAN 7056 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7057 0x30000, // 30000..3134A; HAN 7058 0x3134B, // 3134B..3134F; UNKNOWN 7059 0x31350, // 31350..323AF; HAN 7060 0x323B0, // 323B0..E0000; UNKNOWN 7061 0xE0001, // E0001 ; COMMON 7062 0xE0002, // E0002..E001F; UNKNOWN 7063 0xE0020, // E0020..E007F; COMMON 7064 0xE0080, // E0080..E00FF; UNKNOWN 7065 0xE0100, // E0100..E01EF; INHERITED 7066 0xE01F0, // E01F0..10FFFF; UNKNOWN 7067 }; 7068 7069 private static final UnicodeScript[] scripts = { 7070 COMMON, // 0000..0040 7071 LATIN, // 0041..005A 7072 COMMON, // 005B..0060 7073 LATIN, // 0061..007A 7074 COMMON, // 007B..00A9 7075 LATIN, // 00AA 7076 COMMON, // 00AB..00B9 7077 LATIN, // 00BA 7078 COMMON, // 00BB..00BF 7079 LATIN, // 00C0..00D6 7080 COMMON, // 00D7 7081 LATIN, // 00D8..00F6 7082 COMMON, // 00F7 7083 LATIN, // 00F8..02B8 7084 COMMON, // 02B9..02DF 7085 LATIN, // 02E0..02E4 7086 COMMON, // 02E5..02E9 7087 BOPOMOFO, // 02EA..02EB 7088 COMMON, // 02EC..02FF 7089 INHERITED, // 0300..036F 7090 GREEK, // 0370..0373 7091 COMMON, // 0374 7092 GREEK, // 0375..0377 7093 UNKNOWN, // 0378..0379 7094 GREEK, // 037A..037D 7095 COMMON, // 037E 7096 GREEK, // 037F 7097 UNKNOWN, // 0380..0383 7098 GREEK, // 0384 7099 COMMON, // 0385 7100 GREEK, // 0386 7101 COMMON, // 0387 7102 GREEK, // 0388..038A 7103 UNKNOWN, // 038B 7104 GREEK, // 038C 7105 UNKNOWN, // 038D 7106 GREEK, // 038E..03A1 7107 UNKNOWN, // 03A2 7108 GREEK, // 03A3..03E1 7109 COPTIC, // 03E2..03EF 7110 GREEK, // 03F0..03FF 7111 CYRILLIC, // 0400..0484 7112 INHERITED, // 0485..0486 7113 CYRILLIC, // 0487..052F 7114 UNKNOWN, // 0530 7115 ARMENIAN, // 0531..0556 7116 UNKNOWN, // 0557..0558 7117 ARMENIAN, // 0559..058A 7118 UNKNOWN, // 058B..058C 7119 ARMENIAN, // 058D..058F 7120 UNKNOWN, // 0590 7121 HEBREW, // 0591..05C7 7122 UNKNOWN, // 05C8..05CF 7123 HEBREW, // 05D0..05EA 7124 UNKNOWN, // 05EB..05EE 7125 HEBREW, // 05EF..05F4 7126 UNKNOWN, // 05F5..05FF 7127 ARABIC, // 0600..0604 7128 COMMON, // 0605 7129 ARABIC, // 0606..060B 7130 COMMON, // 060C 7131 ARABIC, // 060D..061A 7132 COMMON, // 061B 7133 ARABIC, // 061C..061E 7134 COMMON, // 061F 7135 ARABIC, // 0620..063F 7136 COMMON, // 0640 7137 ARABIC, // 0641..064A 7138 INHERITED, // 064B..0655 7139 ARABIC, // 0656..066F 7140 INHERITED, // 0670 7141 ARABIC, // 0671..06DC 7142 COMMON, // 06DD 7143 ARABIC, // 06DE..06FF 7144 SYRIAC, // 0700..070D 7145 UNKNOWN, // 070E 7146 SYRIAC, // 070F..074A 7147 UNKNOWN, // 074B..074C 7148 SYRIAC, // 074D..074F 7149 ARABIC, // 0750..077F 7150 THAANA, // 0780..07B1 7151 UNKNOWN, // 07B2..07BF 7152 NKO, // 07C0..07FA 7153 UNKNOWN, // 07FB..07FC 7154 NKO, // 07FD..07FF 7155 SAMARITAN, // 0800..082D 7156 UNKNOWN, // 082E..082F 7157 SAMARITAN, // 0830..083E 7158 UNKNOWN, // 083F 7159 MANDAIC, // 0840..085B 7160 UNKNOWN, // 085C..085D 7161 MANDAIC, // 085E 7162 UNKNOWN, // 085F 7163 SYRIAC, // 0860..086A 7164 UNKNOWN, // 086B..086F 7165 ARABIC, // 0870..088E 7166 UNKNOWN, // 088F 7167 ARABIC, // 0890..0891 7168 UNKNOWN, // 0892..0897 7169 ARABIC, // 0898..08E1 7170 COMMON, // 08E2 7171 ARABIC, // 08E3..08FF 7172 DEVANAGARI, // 0900..0950 7173 INHERITED, // 0951..0954 7174 DEVANAGARI, // 0955..0963 7175 COMMON, // 0964..0965 7176 DEVANAGARI, // 0966..097F 7177 BENGALI, // 0980..0983 7178 UNKNOWN, // 0984 7179 BENGALI, // 0985..098C 7180 UNKNOWN, // 098D..098E 7181 BENGALI, // 098F..0990 7182 UNKNOWN, // 0991..0992 7183 BENGALI, // 0993..09A8 7184 UNKNOWN, // 09A9 7185 BENGALI, // 09AA..09B0 7186 UNKNOWN, // 09B1 7187 BENGALI, // 09B2 7188 UNKNOWN, // 09B3..09B5 7189 BENGALI, // 09B6..09B9 7190 UNKNOWN, // 09BA..09BB 7191 BENGALI, // 09BC..09C4 7192 UNKNOWN, // 09C5..09C6 7193 BENGALI, // 09C7..09C8 7194 UNKNOWN, // 09C9..09CA 7195 BENGALI, // 09CB..09CE 7196 UNKNOWN, // 09CF..09D6 7197 BENGALI, // 09D7 7198 UNKNOWN, // 09D8..09DB 7199 BENGALI, // 09DC..09DD 7200 UNKNOWN, // 09DE 7201 BENGALI, // 09DF..09E3 7202 UNKNOWN, // 09E4..09E5 7203 BENGALI, // 09E6..09FE 7204 UNKNOWN, // 09FF..0A00 7205 GURMUKHI, // 0A01..0A03 7206 UNKNOWN, // 0A04 7207 GURMUKHI, // 0A05..0A0A 7208 UNKNOWN, // 0A0B..0A0E 7209 GURMUKHI, // 0A0F..0A10 7210 UNKNOWN, // 0A11..0A12 7211 GURMUKHI, // 0A13..0A28 7212 UNKNOWN, // 0A29 7213 GURMUKHI, // 0A2A..0A30 7214 UNKNOWN, // 0A31 7215 GURMUKHI, // 0A32..0A33 7216 UNKNOWN, // 0A34 7217 GURMUKHI, // 0A35..0A36 7218 UNKNOWN, // 0A37 7219 GURMUKHI, // 0A38..0A39 7220 UNKNOWN, // 0A3A..0A3B 7221 GURMUKHI, // 0A3C 7222 UNKNOWN, // 0A3D 7223 GURMUKHI, // 0A3E..0A42 7224 UNKNOWN, // 0A43..0A46 7225 GURMUKHI, // 0A47..0A48 7226 UNKNOWN, // 0A49..0A4A 7227 GURMUKHI, // 0A4B..0A4D 7228 UNKNOWN, // 0A4E..0A50 7229 GURMUKHI, // 0A51 7230 UNKNOWN, // 0A52..0A58 7231 GURMUKHI, // 0A59..0A5C 7232 UNKNOWN, // 0A5D 7233 GURMUKHI, // 0A5E 7234 UNKNOWN, // 0A5F..0A65 7235 GURMUKHI, // 0A66..0A76 7236 UNKNOWN, // 0A77..0A80 7237 GUJARATI, // 0A81..0A83 7238 UNKNOWN, // 0A84 7239 GUJARATI, // 0A85..0A8D 7240 UNKNOWN, // 0A8E 7241 GUJARATI, // 0A8F..0A91 7242 UNKNOWN, // 0A92 7243 GUJARATI, // 0A93..0AA8 7244 UNKNOWN, // 0AA9 7245 GUJARATI, // 0AAA..0AB0 7246 UNKNOWN, // 0AB1 7247 GUJARATI, // 0AB2..0AB3 7248 UNKNOWN, // 0AB4 7249 GUJARATI, // 0AB5..0AB9 7250 UNKNOWN, // 0ABA..0ABB 7251 GUJARATI, // 0ABC..0AC5 7252 UNKNOWN, // 0AC6 7253 GUJARATI, // 0AC7..0AC9 7254 UNKNOWN, // 0ACA 7255 GUJARATI, // 0ACB..0ACD 7256 UNKNOWN, // 0ACE..0ACF 7257 GUJARATI, // 0AD0 7258 UNKNOWN, // 0AD1..0ADF 7259 GUJARATI, // 0AE0..0AE3 7260 UNKNOWN, // 0AE4..0AE5 7261 GUJARATI, // 0AE6..0AF1 7262 UNKNOWN, // 0AF2..0AF8 7263 GUJARATI, // 0AF9..0AFF 7264 UNKNOWN, // 0B00 7265 ORIYA, // 0B01..0B03 7266 UNKNOWN, // 0B04 7267 ORIYA, // 0B05..0B0C 7268 UNKNOWN, // 0B0D..0B0E 7269 ORIYA, // 0B0F..0B10 7270 UNKNOWN, // 0B11..0B12 7271 ORIYA, // 0B13..0B28 7272 UNKNOWN, // 0B29 7273 ORIYA, // 0B2A..0B30 7274 UNKNOWN, // 0B31 7275 ORIYA, // 0B32..0B33 7276 UNKNOWN, // 0B34 7277 ORIYA, // 0B35..0B39 7278 UNKNOWN, // 0B3A..0B3B 7279 ORIYA, // 0B3C..0B44 7280 UNKNOWN, // 0B45..0B46 7281 ORIYA, // 0B47..0B48 7282 UNKNOWN, // 0B49..0B4A 7283 ORIYA, // 0B4B..0B4D 7284 UNKNOWN, // 0B4E..0B54 7285 ORIYA, // 0B55..0B57 7286 UNKNOWN, // 0B58..0B5B 7287 ORIYA, // 0B5C..0B5D 7288 UNKNOWN, // 0B5E 7289 ORIYA, // 0B5F..0B63 7290 UNKNOWN, // 0B64..0B65 7291 ORIYA, // 0B66..0B77 7292 UNKNOWN, // 0B78..0B81 7293 TAMIL, // 0B82..0B83 7294 UNKNOWN, // 0B84 7295 TAMIL, // 0B85..0B8A 7296 UNKNOWN, // 0B8B..0B8D 7297 TAMIL, // 0B8E..0B90 7298 UNKNOWN, // 0B91 7299 TAMIL, // 0B92..0B95 7300 UNKNOWN, // 0B96..0B98 7301 TAMIL, // 0B99..0B9A 7302 UNKNOWN, // 0B9B 7303 TAMIL, // 0B9C 7304 UNKNOWN, // 0B9D 7305 TAMIL, // 0B9E..0B9F 7306 UNKNOWN, // 0BA0..0BA2 7307 TAMIL, // 0BA3..0BA4 7308 UNKNOWN, // 0BA5..0BA7 7309 TAMIL, // 0BA8..0BAA 7310 UNKNOWN, // 0BAB..0BAD 7311 TAMIL, // 0BAE..0BB9 7312 UNKNOWN, // 0BBA..0BBD 7313 TAMIL, // 0BBE..0BC2 7314 UNKNOWN, // 0BC3..0BC5 7315 TAMIL, // 0BC6..0BC8 7316 UNKNOWN, // 0BC9 7317 TAMIL, // 0BCA..0BCD 7318 UNKNOWN, // 0BCE..0BCF 7319 TAMIL, // 0BD0 7320 UNKNOWN, // 0BD1..0BD6 7321 TAMIL, // 0BD7 7322 UNKNOWN, // 0BD8..0BE5 7323 TAMIL, // 0BE6..0BFA 7324 UNKNOWN, // 0BFB..0BFF 7325 TELUGU, // 0C00..0C0C 7326 UNKNOWN, // 0C0D 7327 TELUGU, // 0C0E..0C10 7328 UNKNOWN, // 0C11 7329 TELUGU, // 0C12..0C28 7330 UNKNOWN, // 0C29 7331 TELUGU, // 0C2A..0C39 7332 UNKNOWN, // 0C3A..0C3B 7333 TELUGU, // 0C3C..0C44 7334 UNKNOWN, // 0C45 7335 TELUGU, // 0C46..0C48 7336 UNKNOWN, // 0C49 7337 TELUGU, // 0C4A..0C4D 7338 UNKNOWN, // 0C4E..0C54 7339 TELUGU, // 0C55..0C56 7340 UNKNOWN, // 0C57 7341 TELUGU, // 0C58..0C5A 7342 UNKNOWN, // 0C5B..0C5C 7343 TELUGU, // 0C5D 7344 UNKNOWN, // 0C5E..0C5F 7345 TELUGU, // 0C60..0C63 7346 UNKNOWN, // 0C64..0C65 7347 TELUGU, // 0C66..0C6F 7348 UNKNOWN, // 0C70..0C76 7349 TELUGU, // 0C77..0C7F 7350 KANNADA, // 0C80..0C8C 7351 UNKNOWN, // 0C8D 7352 KANNADA, // 0C8E..0C90 7353 UNKNOWN, // 0C91 7354 KANNADA, // 0C92..0CA8 7355 UNKNOWN, // 0CA9 7356 KANNADA, // 0CAA..0CB3 7357 UNKNOWN, // 0CB4 7358 KANNADA, // 0CB5..0CB9 7359 UNKNOWN, // 0CBA..0CBB 7360 KANNADA, // 0CBC..0CC4 7361 UNKNOWN, // 0CC5 7362 KANNADA, // 0CC6..0CC8 7363 UNKNOWN, // 0CC9 7364 KANNADA, // 0CCA..0CCD 7365 UNKNOWN, // 0CCE..0CD4 7366 KANNADA, // 0CD5..0CD6 7367 UNKNOWN, // 0CD7..0CDC 7368 KANNADA, // 0CDD..0CDE 7369 UNKNOWN, // 0CDF 7370 KANNADA, // 0CE0..0CE3 7371 UNKNOWN, // 0CE4..0CE5 7372 KANNADA, // 0CE6..0CEF 7373 UNKNOWN, // 0CF0 7374 KANNADA, // 0CF1..0CF3 7375 UNKNOWN, // 0CF4..0CFF 7376 MALAYALAM, // 0D00..0D0C 7377 UNKNOWN, // 0D0D 7378 MALAYALAM, // 0D0E..0D10 7379 UNKNOWN, // 0D11 7380 MALAYALAM, // 0D12..0D44 7381 UNKNOWN, // 0D45 7382 MALAYALAM, // 0D46..0D48 7383 UNKNOWN, // 0D49 7384 MALAYALAM, // 0D4A..0D4F 7385 UNKNOWN, // 0D50..0D53 7386 MALAYALAM, // 0D54..0D63 7387 UNKNOWN, // 0D64..0D65 7388 MALAYALAM, // 0D66..0D7F 7389 UNKNOWN, // 0D80 7390 SINHALA, // 0D81..0D83 7391 UNKNOWN, // 0D84 7392 SINHALA, // 0D85..0D96 7393 UNKNOWN, // 0D97..0D99 7394 SINHALA, // 0D9A..0DB1 7395 UNKNOWN, // 0DB2 7396 SINHALA, // 0DB3..0DBB 7397 UNKNOWN, // 0DBC 7398 SINHALA, // 0DBD 7399 UNKNOWN, // 0DBE..0DBF 7400 SINHALA, // 0DC0..0DC6 7401 UNKNOWN, // 0DC7..0DC9 7402 SINHALA, // 0DCA 7403 UNKNOWN, // 0DCB..0DCE 7404 SINHALA, // 0DCF..0DD4 7405 UNKNOWN, // 0DD5 7406 SINHALA, // 0DD6 7407 UNKNOWN, // 0DD7 7408 SINHALA, // 0DD8..0DDF 7409 UNKNOWN, // 0DE0..0DE5 7410 SINHALA, // 0DE6..0DEF 7411 UNKNOWN, // 0DF0..0DF1 7412 SINHALA, // 0DF2..0DF4 7413 UNKNOWN, // 0DF5..0E00 7414 THAI, // 0E01..0E3A 7415 UNKNOWN, // 0E3B..0E3E 7416 COMMON, // 0E3F 7417 THAI, // 0E40..0E5B 7418 UNKNOWN, // 0E5C..0E80 7419 LAO, // 0E81..0E82 7420 UNKNOWN, // 0E83 7421 LAO, // 0E84 7422 UNKNOWN, // 0E85 7423 LAO, // 0E86..0E8A 7424 UNKNOWN, // 0E8B 7425 LAO, // 0E8C..0EA3 7426 UNKNOWN, // 0EA4 7427 LAO, // 0EA5 7428 UNKNOWN, // 0EA6 7429 LAO, // 0EA7..0EBD 7430 UNKNOWN, // 0EBE..0EBF 7431 LAO, // 0EC0..0EC4 7432 UNKNOWN, // 0EC5 7433 LAO, // 0EC6 7434 UNKNOWN, // 0EC7 7435 LAO, // 0EC8..0ECE 7436 UNKNOWN, // 0ECF 7437 LAO, // 0ED0..0ED9 7438 UNKNOWN, // 0EDA..0EDB 7439 LAO, // 0EDC..0EDF 7440 UNKNOWN, // 0EE0..0EFF 7441 TIBETAN, // 0F00..0F47 7442 UNKNOWN, // 0F48 7443 TIBETAN, // 0F49..0F6C 7444 UNKNOWN, // 0F6D..0F70 7445 TIBETAN, // 0F71..0F97 7446 UNKNOWN, // 0F98 7447 TIBETAN, // 0F99..0FBC 7448 UNKNOWN, // 0FBD 7449 TIBETAN, // 0FBE..0FCC 7450 UNKNOWN, // 0FCD 7451 TIBETAN, // 0FCE..0FD4 7452 COMMON, // 0FD5..0FD8 7453 TIBETAN, // 0FD9..0FDA 7454 UNKNOWN, // 0FDB..0FFF 7455 MYANMAR, // 1000..109F 7456 GEORGIAN, // 10A0..10C5 7457 UNKNOWN, // 10C6 7458 GEORGIAN, // 10C7 7459 UNKNOWN, // 10C8..10CC 7460 GEORGIAN, // 10CD 7461 UNKNOWN, // 10CE..10CF 7462 GEORGIAN, // 10D0..10FA 7463 COMMON, // 10FB 7464 GEORGIAN, // 10FC..10FF 7465 HANGUL, // 1100..11FF 7466 ETHIOPIC, // 1200..1248 7467 UNKNOWN, // 1249 7468 ETHIOPIC, // 124A..124D 7469 UNKNOWN, // 124E..124F 7470 ETHIOPIC, // 1250..1256 7471 UNKNOWN, // 1257 7472 ETHIOPIC, // 1258 7473 UNKNOWN, // 1259 7474 ETHIOPIC, // 125A..125D 7475 UNKNOWN, // 125E..125F 7476 ETHIOPIC, // 1260..1288 7477 UNKNOWN, // 1289 7478 ETHIOPIC, // 128A..128D 7479 UNKNOWN, // 128E..128F 7480 ETHIOPIC, // 1290..12B0 7481 UNKNOWN, // 12B1 7482 ETHIOPIC, // 12B2..12B5 7483 UNKNOWN, // 12B6..12B7 7484 ETHIOPIC, // 12B8..12BE 7485 UNKNOWN, // 12BF 7486 ETHIOPIC, // 12C0 7487 UNKNOWN, // 12C1 7488 ETHIOPIC, // 12C2..12C5 7489 UNKNOWN, // 12C6..12C7 7490 ETHIOPIC, // 12C8..12D6 7491 UNKNOWN, // 12D7 7492 ETHIOPIC, // 12D8..1310 7493 UNKNOWN, // 1311 7494 ETHIOPIC, // 1312..1315 7495 UNKNOWN, // 1316..1317 7496 ETHIOPIC, // 1318..135A 7497 UNKNOWN, // 135B..135C 7498 ETHIOPIC, // 135D..137C 7499 UNKNOWN, // 137D..137F 7500 ETHIOPIC, // 1380..1399 7501 UNKNOWN, // 139A..139F 7502 CHEROKEE, // 13A0..13F5 7503 UNKNOWN, // 13F6..13F7 7504 CHEROKEE, // 13F8..13FD 7505 UNKNOWN, // 13FE..13FF 7506 CANADIAN_ABORIGINAL, // 1400..167F 7507 OGHAM, // 1680..169C 7508 UNKNOWN, // 169D..169F 7509 RUNIC, // 16A0..16EA 7510 COMMON, // 16EB..16ED 7511 RUNIC, // 16EE..16F8 7512 UNKNOWN, // 16F9..16FF 7513 TAGALOG, // 1700..1715 7514 UNKNOWN, // 1716..171E 7515 TAGALOG, // 171F 7516 HANUNOO, // 1720..1734 7517 COMMON, // 1735..1736 7518 UNKNOWN, // 1737..173F 7519 BUHID, // 1740..1753 7520 UNKNOWN, // 1754..175F 7521 TAGBANWA, // 1760..176C 7522 UNKNOWN, // 176D 7523 TAGBANWA, // 176E..1770 7524 UNKNOWN, // 1771 7525 TAGBANWA, // 1772..1773 7526 UNKNOWN, // 1774..177F 7527 KHMER, // 1780..17DD 7528 UNKNOWN, // 17DE..17DF 7529 KHMER, // 17E0..17E9 7530 UNKNOWN, // 17EA..17EF 7531 KHMER, // 17F0..17F9 7532 UNKNOWN, // 17FA..17FF 7533 MONGOLIAN, // 1800..1801 7534 COMMON, // 1802..1803 7535 MONGOLIAN, // 1804 7536 COMMON, // 1805 7537 MONGOLIAN, // 1806..1819 7538 UNKNOWN, // 181A..181F 7539 MONGOLIAN, // 1820..1878 7540 UNKNOWN, // 1879..187F 7541 MONGOLIAN, // 1880..18AA 7542 UNKNOWN, // 18AB..18AF 7543 CANADIAN_ABORIGINAL, // 18B0..18F5 7544 UNKNOWN, // 18F6..18FF 7545 LIMBU, // 1900..191E 7546 UNKNOWN, // 191F 7547 LIMBU, // 1920..192B 7548 UNKNOWN, // 192C..192F 7549 LIMBU, // 1930..193B 7550 UNKNOWN, // 193C..193F 7551 LIMBU, // 1940 7552 UNKNOWN, // 1941..1943 7553 LIMBU, // 1944..194F 7554 TAI_LE, // 1950..196D 7555 UNKNOWN, // 196E..196F 7556 TAI_LE, // 1970..1974 7557 UNKNOWN, // 1975..197F 7558 NEW_TAI_LUE, // 1980..19AB 7559 UNKNOWN, // 19AC..19AF 7560 NEW_TAI_LUE, // 19B0..19C9 7561 UNKNOWN, // 19CA..19CF 7562 NEW_TAI_LUE, // 19D0..19DA 7563 UNKNOWN, // 19DB..19DD 7564 NEW_TAI_LUE, // 19DE..19DF 7565 KHMER, // 19E0..19FF 7566 BUGINESE, // 1A00..1A1B 7567 UNKNOWN, // 1A1C..1A1D 7568 BUGINESE, // 1A1E..1A1F 7569 TAI_THAM, // 1A20..1A5E 7570 UNKNOWN, // 1A5F 7571 TAI_THAM, // 1A60..1A7C 7572 UNKNOWN, // 1A7D..1A7E 7573 TAI_THAM, // 1A7F..1A89 7574 UNKNOWN, // 1A8A..1A8F 7575 TAI_THAM, // 1A90..1A99 7576 UNKNOWN, // 1A9A..1A9F 7577 TAI_THAM, // 1AA0..1AAD 7578 UNKNOWN, // 1AAE..1AAF 7579 INHERITED, // 1AB0..1ACE 7580 UNKNOWN, // 1ACF..1AFF 7581 BALINESE, // 1B00..1B4C 7582 UNKNOWN, // 1B4D..1B4F 7583 BALINESE, // 1B50..1B7E 7584 UNKNOWN, // 1B7F 7585 SUNDANESE, // 1B80..1BBF 7586 BATAK, // 1BC0..1BF3 7587 UNKNOWN, // 1BF4..1BFB 7588 BATAK, // 1BFC..1BFF 7589 LEPCHA, // 1C00..1C37 7590 UNKNOWN, // 1C38..1C3A 7591 LEPCHA, // 1C3B..1C49 7592 UNKNOWN, // 1C4A..1C4C 7593 LEPCHA, // 1C4D..1C4F 7594 OL_CHIKI, // 1C50..1C7F 7595 CYRILLIC, // 1C80..1C88 7596 UNKNOWN, // 1C89..1C8F 7597 GEORGIAN, // 1C90..1CBA 7598 UNKNOWN, // 1CBB..1CBC 7599 GEORGIAN, // 1CBD..1CBF 7600 SUNDANESE, // 1CC0..1CC7 7601 UNKNOWN, // 1CC8..1CCF 7602 INHERITED, // 1CD0..1CD2 7603 COMMON, // 1CD3 7604 INHERITED, // 1CD4..1CE0 7605 COMMON, // 1CE1 7606 INHERITED, // 1CE2..1CE8 7607 COMMON, // 1CE9..1CEC 7608 INHERITED, // 1CED 7609 COMMON, // 1CEE..1CF3 7610 INHERITED, // 1CF4 7611 COMMON, // 1CF5..1CF7 7612 INHERITED, // 1CF8..1CF9 7613 COMMON, // 1CFA 7614 UNKNOWN, // 1CFB..1CFF 7615 LATIN, // 1D00..1D25 7616 GREEK, // 1D26..1D2A 7617 CYRILLIC, // 1D2B 7618 LATIN, // 1D2C..1D5C 7619 GREEK, // 1D5D..1D61 7620 LATIN, // 1D62..1D65 7621 GREEK, // 1D66..1D6A 7622 LATIN, // 1D6B..1D77 7623 CYRILLIC, // 1D78 7624 LATIN, // 1D79..1DBE 7625 GREEK, // 1DBF 7626 INHERITED, // 1DC0..1DFF 7627 LATIN, // 1E00..1EFF 7628 GREEK, // 1F00..1F15 7629 UNKNOWN, // 1F16..1F17 7630 GREEK, // 1F18..1F1D 7631 UNKNOWN, // 1F1E..1F1F 7632 GREEK, // 1F20..1F45 7633 UNKNOWN, // 1F46..1F47 7634 GREEK, // 1F48..1F4D 7635 UNKNOWN, // 1F4E..1F4F 7636 GREEK, // 1F50..1F57 7637 UNKNOWN, // 1F58 7638 GREEK, // 1F59 7639 UNKNOWN, // 1F5A 7640 GREEK, // 1F5B 7641 UNKNOWN, // 1F5C 7642 GREEK, // 1F5D 7643 UNKNOWN, // 1F5E 7644 GREEK, // 1F5F..1F7D 7645 UNKNOWN, // 1F7E..1F7F 7646 GREEK, // 1F80..1FB4 7647 UNKNOWN, // 1FB5 7648 GREEK, // 1FB6..1FC4 7649 UNKNOWN, // 1FC5 7650 GREEK, // 1FC6..1FD3 7651 UNKNOWN, // 1FD4..1FD5 7652 GREEK, // 1FD6..1FDB 7653 UNKNOWN, // 1FDC 7654 GREEK, // 1FDD..1FEF 7655 UNKNOWN, // 1FF0..1FF1 7656 GREEK, // 1FF2..1FF4 7657 UNKNOWN, // 1FF5 7658 GREEK, // 1FF6..1FFE 7659 UNKNOWN, // 1FFF 7660 COMMON, // 2000..200B 7661 INHERITED, // 200C..200D 7662 COMMON, // 200E..2064 7663 UNKNOWN, // 2065 7664 COMMON, // 2066..2070 7665 LATIN, // 2071 7666 UNKNOWN, // 2072..2073 7667 COMMON, // 2074..207E 7668 LATIN, // 207F 7669 COMMON, // 2080..208E 7670 UNKNOWN, // 208F 7671 LATIN, // 2090..209C 7672 UNKNOWN, // 209D..209F 7673 COMMON, // 20A0..20C0 7674 UNKNOWN, // 20C1..20CF 7675 INHERITED, // 20D0..20F0 7676 UNKNOWN, // 20F1..20FF 7677 COMMON, // 2100..2125 7678 GREEK, // 2126 7679 COMMON, // 2127..2129 7680 LATIN, // 212A..212B 7681 COMMON, // 212C..2131 7682 LATIN, // 2132 7683 COMMON, // 2133..214D 7684 LATIN, // 214E 7685 COMMON, // 214F..215F 7686 LATIN, // 2160..2188 7687 COMMON, // 2189..218B 7688 UNKNOWN, // 218C..218F 7689 COMMON, // 2190..2426 7690 UNKNOWN, // 2427..243F 7691 COMMON, // 2440..244A 7692 UNKNOWN, // 244B..245F 7693 COMMON, // 2460..27FF 7694 BRAILLE, // 2800..28FF 7695 COMMON, // 2900..2B73 7696 UNKNOWN, // 2B74..2B75 7697 COMMON, // 2B76..2B95 7698 UNKNOWN, // 2B96 7699 COMMON, // 2B97..2BFF 7700 GLAGOLITIC, // 2C00..2C5F 7701 LATIN, // 2C60..2C7F 7702 COPTIC, // 2C80..2CF3 7703 UNKNOWN, // 2CF4..2CF8 7704 COPTIC, // 2CF9..2CFF 7705 GEORGIAN, // 2D00..2D25 7706 UNKNOWN, // 2D26 7707 GEORGIAN, // 2D27 7708 UNKNOWN, // 2D28..2D2C 7709 GEORGIAN, // 2D2D 7710 UNKNOWN, // 2D2E..2D2F 7711 TIFINAGH, // 2D30..2D67 7712 UNKNOWN, // 2D68..2D6E 7713 TIFINAGH, // 2D6F..2D70 7714 UNKNOWN, // 2D71..2D7E 7715 TIFINAGH, // 2D7F 7716 ETHIOPIC, // 2D80..2D96 7717 UNKNOWN, // 2D97..2D9F 7718 ETHIOPIC, // 2DA0..2DA6 7719 UNKNOWN, // 2DA7 7720 ETHIOPIC, // 2DA8..2DAE 7721 UNKNOWN, // 2DAF 7722 ETHIOPIC, // 2DB0..2DB6 7723 UNKNOWN, // 2DB7 7724 ETHIOPIC, // 2DB8..2DBE 7725 UNKNOWN, // 2DBF 7726 ETHIOPIC, // 2DC0..2DC6 7727 UNKNOWN, // 2DC7 7728 ETHIOPIC, // 2DC8..2DCE 7729 UNKNOWN, // 2DCF 7730 ETHIOPIC, // 2DD0..2DD6 7731 UNKNOWN, // 2DD7 7732 ETHIOPIC, // 2DD8..2DDE 7733 UNKNOWN, // 2DDF 7734 CYRILLIC, // 2DE0..2DFF 7735 COMMON, // 2E00..2E5D 7736 UNKNOWN, // 2E5E..2E7F 7737 HAN, // 2E80..2E99 7738 UNKNOWN, // 2E9A 7739 HAN, // 2E9B..2EF3 7740 UNKNOWN, // 2EF4..2EFF 7741 HAN, // 2F00..2FD5 7742 UNKNOWN, // 2FD6..2FEF 7743 COMMON, // 2FF0..3004 7744 HAN, // 3005 7745 COMMON, // 3006 7746 HAN, // 3007 7747 COMMON, // 3008..3020 7748 HAN, // 3021..3029 7749 INHERITED, // 302A..302D 7750 HANGUL, // 302E..302F 7751 COMMON, // 3030..3037 7752 HAN, // 3038..303B 7753 COMMON, // 303C..303F 7754 UNKNOWN, // 3040 7755 HIRAGANA, // 3041..3096 7756 UNKNOWN, // 3097..3098 7757 INHERITED, // 3099..309A 7758 COMMON, // 309B..309C 7759 HIRAGANA, // 309D..309F 7760 COMMON, // 30A0 7761 KATAKANA, // 30A1..30FA 7762 COMMON, // 30FB..30FC 7763 KATAKANA, // 30FD..30FF 7764 UNKNOWN, // 3100..3104 7765 BOPOMOFO, // 3105..312F 7766 UNKNOWN, // 3130 7767 HANGUL, // 3131..318E 7768 UNKNOWN, // 318F 7769 COMMON, // 3190..319F 7770 BOPOMOFO, // 31A0..31BF 7771 COMMON, // 31C0..31E3 7772 UNKNOWN, // 31E4..31EE 7773 COMMON, // 31EF 7774 KATAKANA, // 31F0..31FF 7775 HANGUL, // 3200..321E 7776 UNKNOWN, // 321F 7777 COMMON, // 3220..325F 7778 HANGUL, // 3260..327E 7779 COMMON, // 327F..32CF 7780 KATAKANA, // 32D0..32FE 7781 COMMON, // 32FF 7782 KATAKANA, // 3300..3357 7783 COMMON, // 3358..33FF 7784 HAN, // 3400..4DBF 7785 COMMON, // 4DC0..4DFF 7786 HAN, // 4E00..9FFF 7787 YI, // A000..A48C 7788 UNKNOWN, // A48D..A48F 7789 YI, // A490..A4C6 7790 UNKNOWN, // A4C7..A4CF 7791 LISU, // A4D0..A4FF 7792 VAI, // A500..A62B 7793 UNKNOWN, // A62C..A63F 7794 CYRILLIC, // A640..A69F 7795 BAMUM, // A6A0..A6F7 7796 UNKNOWN, // A6F8..A6FF 7797 COMMON, // A700..A721 7798 LATIN, // A722..A787 7799 COMMON, // A788..A78A 7800 LATIN, // A78B..A7CA 7801 UNKNOWN, // A7CB..A7CF 7802 LATIN, // A7D0..A7D1 7803 UNKNOWN, // A7D2 7804 LATIN, // A7D3 7805 UNKNOWN, // A7D4 7806 LATIN, // A7D5..A7D9 7807 UNKNOWN, // A7DA..A7F1 7808 LATIN, // A7F2..A7FF 7809 SYLOTI_NAGRI, // A800..A82C 7810 UNKNOWN, // A82D..A82F 7811 COMMON, // A830..A839 7812 UNKNOWN, // A83A..A83F 7813 PHAGS_PA, // A840..A877 7814 UNKNOWN, // A878..A87F 7815 SAURASHTRA, // A880..A8C5 7816 UNKNOWN, // A8C6..A8CD 7817 SAURASHTRA, // A8CE..A8D9 7818 UNKNOWN, // A8DA..A8DF 7819 DEVANAGARI, // A8E0..A8FF 7820 KAYAH_LI, // A900..A92D 7821 COMMON, // A92E 7822 KAYAH_LI, // A92F 7823 REJANG, // A930..A953 7824 UNKNOWN, // A954..A95E 7825 REJANG, // A95F 7826 HANGUL, // A960..A97C 7827 UNKNOWN, // A97D..A97F 7828 JAVANESE, // A980..A9CD 7829 UNKNOWN, // A9CE 7830 COMMON, // A9CF 7831 JAVANESE, // A9D0..A9D9 7832 UNKNOWN, // A9DA..A9DD 7833 JAVANESE, // A9DE..A9DF 7834 MYANMAR, // A9E0..A9FE 7835 UNKNOWN, // A9FF 7836 CHAM, // AA00..AA36 7837 UNKNOWN, // AA37..AA3F 7838 CHAM, // AA40..AA4D 7839 UNKNOWN, // AA4E..AA4F 7840 CHAM, // AA50..AA59 7841 UNKNOWN, // AA5A..AA5B 7842 CHAM, // AA5C..AA5F 7843 MYANMAR, // AA60..AA7F 7844 TAI_VIET, // AA80..AAC2 7845 UNKNOWN, // AAC3..AADA 7846 TAI_VIET, // AADB..AADF 7847 MEETEI_MAYEK, // AAE0..AAF6 7848 UNKNOWN, // AAF7..AB00 7849 ETHIOPIC, // AB01..AB06 7850 UNKNOWN, // AB07..AB08 7851 ETHIOPIC, // AB09..AB0E 7852 UNKNOWN, // AB0F..AB10 7853 ETHIOPIC, // AB11..AB16 7854 UNKNOWN, // AB17..AB1F 7855 ETHIOPIC, // AB20..AB26 7856 UNKNOWN, // AB27 7857 ETHIOPIC, // AB28..AB2E 7858 UNKNOWN, // AB2F 7859 LATIN, // AB30..AB5A 7860 COMMON, // AB5B 7861 LATIN, // AB5C..AB64 7862 GREEK, // AB65 7863 LATIN, // AB66..AB69 7864 COMMON, // AB6A..AB6B 7865 UNKNOWN, // AB6C..AB6F 7866 CHEROKEE, // AB70..ABBF 7867 MEETEI_MAYEK, // ABC0..ABED 7868 UNKNOWN, // ABEE..ABEF 7869 MEETEI_MAYEK, // ABF0..ABF9 7870 UNKNOWN, // ABFA..ABFF 7871 HANGUL, // AC00..D7A3 7872 UNKNOWN, // D7A4..D7AF 7873 HANGUL, // D7B0..D7C6 7874 UNKNOWN, // D7C7..D7CA 7875 HANGUL, // D7CB..D7FB 7876 UNKNOWN, // D7FC..F8FF 7877 HAN, // F900..FA6D 7878 UNKNOWN, // FA6E..FA6F 7879 HAN, // FA70..FAD9 7880 UNKNOWN, // FADA..FAFF 7881 LATIN, // FB00..FB06 7882 UNKNOWN, // FB07..FB12 7883 ARMENIAN, // FB13..FB17 7884 UNKNOWN, // FB18..FB1C 7885 HEBREW, // FB1D..FB36 7886 UNKNOWN, // FB37 7887 HEBREW, // FB38..FB3C 7888 UNKNOWN, // FB3D 7889 HEBREW, // FB3E 7890 UNKNOWN, // FB3F 7891 HEBREW, // FB40..FB41 7892 UNKNOWN, // FB42 7893 HEBREW, // FB43..FB44 7894 UNKNOWN, // FB45 7895 HEBREW, // FB46..FB4F 7896 ARABIC, // FB50..FBC2 7897 UNKNOWN, // FBC3..FBD2 7898 ARABIC, // FBD3..FD3D 7899 COMMON, // FD3E..FD3F 7900 ARABIC, // FD40..FD8F 7901 UNKNOWN, // FD90..FD91 7902 ARABIC, // FD92..FDC7 7903 UNKNOWN, // FDC8..FDCE 7904 ARABIC, // FDCF 7905 UNKNOWN, // FDD0..FDEF 7906 ARABIC, // FDF0..FDFF 7907 INHERITED, // FE00..FE0F 7908 COMMON, // FE10..FE19 7909 UNKNOWN, // FE1A..FE1F 7910 INHERITED, // FE20..FE2D 7911 CYRILLIC, // FE2E..FE2F 7912 COMMON, // FE30..FE52 7913 UNKNOWN, // FE53 7914 COMMON, // FE54..FE66 7915 UNKNOWN, // FE67 7916 COMMON, // FE68..FE6B 7917 UNKNOWN, // FE6C..FE6F 7918 ARABIC, // FE70..FE74 7919 UNKNOWN, // FE75 7920 ARABIC, // FE76..FEFC 7921 UNKNOWN, // FEFD..FEFE 7922 COMMON, // FEFF 7923 UNKNOWN, // FF00 7924 COMMON, // FF01..FF20 7925 LATIN, // FF21..FF3A 7926 COMMON, // FF3B..FF40 7927 LATIN, // FF41..FF5A 7928 COMMON, // FF5B..FF65 7929 KATAKANA, // FF66..FF6F 7930 COMMON, // FF70 7931 KATAKANA, // FF71..FF9D 7932 COMMON, // FF9E..FF9F 7933 HANGUL, // FFA0..FFBE 7934 UNKNOWN, // FFBF..FFC1 7935 HANGUL, // FFC2..FFC7 7936 UNKNOWN, // FFC8..FFC9 7937 HANGUL, // FFCA..FFCF 7938 UNKNOWN, // FFD0..FFD1 7939 HANGUL, // FFD2..FFD7 7940 UNKNOWN, // FFD8..FFD9 7941 HANGUL, // FFDA..FFDC 7942 UNKNOWN, // FFDD..FFDF 7943 COMMON, // FFE0..FFE6 7944 UNKNOWN, // FFE7 7945 COMMON, // FFE8..FFEE 7946 UNKNOWN, // FFEF..FFF8 7947 COMMON, // FFF9..FFFD 7948 UNKNOWN, // FFFE..FFFF 7949 LINEAR_B, // 10000..1000B 7950 UNKNOWN, // 1000C 7951 LINEAR_B, // 1000D..10026 7952 UNKNOWN, // 10027 7953 LINEAR_B, // 10028..1003A 7954 UNKNOWN, // 1003B 7955 LINEAR_B, // 1003C..1003D 7956 UNKNOWN, // 1003E 7957 LINEAR_B, // 1003F..1004D 7958 UNKNOWN, // 1004E..1004F 7959 LINEAR_B, // 10050..1005D 7960 UNKNOWN, // 1005E..1007F 7961 LINEAR_B, // 10080..100FA 7962 UNKNOWN, // 100FB..100FF 7963 COMMON, // 10100..10102 7964 UNKNOWN, // 10103..10106 7965 COMMON, // 10107..10133 7966 UNKNOWN, // 10134..10136 7967 COMMON, // 10137..1013F 7968 GREEK, // 10140..1018E 7969 UNKNOWN, // 1018F 7970 COMMON, // 10190..1019C 7971 UNKNOWN, // 1019D..1019F 7972 GREEK, // 101A0 7973 UNKNOWN, // 101A1..101CF 7974 COMMON, // 101D0..101FC 7975 INHERITED, // 101FD 7976 UNKNOWN, // 101FE..1027F 7977 LYCIAN, // 10280..1029C 7978 UNKNOWN, // 1029D..1029F 7979 CARIAN, // 102A0..102D0 7980 UNKNOWN, // 102D1..102DF 7981 INHERITED, // 102E0 7982 COMMON, // 102E1..102FB 7983 UNKNOWN, // 102FC..102FF 7984 OLD_ITALIC, // 10300..10323 7985 UNKNOWN, // 10324..1032C 7986 OLD_ITALIC, // 1032D..1032F 7987 GOTHIC, // 10330..1034A 7988 UNKNOWN, // 1034B..1034F 7989 OLD_PERMIC, // 10350..1037A 7990 UNKNOWN, // 1037B..1037F 7991 UGARITIC, // 10380..1039D 7992 UNKNOWN, // 1039E 7993 UGARITIC, // 1039F 7994 OLD_PERSIAN, // 103A0..103C3 7995 UNKNOWN, // 103C4..103C7 7996 OLD_PERSIAN, // 103C8..103D5 7997 UNKNOWN, // 103D6..103FF 7998 DESERET, // 10400..1044F 7999 SHAVIAN, // 10450..1047F 8000 OSMANYA, // 10480..1049D 8001 UNKNOWN, // 1049E..1049F 8002 OSMANYA, // 104A0..104A9 8003 UNKNOWN, // 104AA..104AF 8004 OSAGE, // 104B0..104D3 8005 UNKNOWN, // 104D4..104D7 8006 OSAGE, // 104D8..104FB 8007 UNKNOWN, // 104FC..104FF 8008 ELBASAN, // 10500..10527 8009 UNKNOWN, // 10528..1052F 8010 CAUCASIAN_ALBANIAN, // 10530..10563 8011 UNKNOWN, // 10564..1056E 8012 CAUCASIAN_ALBANIAN, // 1056F 8013 VITHKUQI, // 10570..1057A 8014 UNKNOWN, // 1057B 8015 VITHKUQI, // 1057C..1058A 8016 UNKNOWN, // 1058B 8017 VITHKUQI, // 1058C..10592 8018 UNKNOWN, // 10593 8019 VITHKUQI, // 10594..10595 8020 UNKNOWN, // 10596 8021 VITHKUQI, // 10597..105A1 8022 UNKNOWN, // 105A2 8023 VITHKUQI, // 105A3..105B1 8024 UNKNOWN, // 105B2 8025 VITHKUQI, // 105B3..105B9 8026 UNKNOWN, // 105BA 8027 VITHKUQI, // 105BB..105BC 8028 UNKNOWN, // 105BD..105FF 8029 LINEAR_A, // 10600..10736 8030 UNKNOWN, // 10737..1073F 8031 LINEAR_A, // 10740..10755 8032 UNKNOWN, // 10756..1075F 8033 LINEAR_A, // 10760..10767 8034 UNKNOWN, // 10768..1077F 8035 LATIN, // 10780..10785 8036 UNKNOWN, // 10786 8037 LATIN, // 10787..107B0 8038 UNKNOWN, // 107B1 8039 LATIN, // 107B2..107BA 8040 UNKNOWN, // 107BB..107FF 8041 CYPRIOT, // 10800..10805 8042 UNKNOWN, // 10806..10807 8043 CYPRIOT, // 10808 8044 UNKNOWN, // 10809 8045 CYPRIOT, // 1080A..10835 8046 UNKNOWN, // 10836 8047 CYPRIOT, // 10837..10838 8048 UNKNOWN, // 10839..1083B 8049 CYPRIOT, // 1083C 8050 UNKNOWN, // 1083D..1083E 8051 CYPRIOT, // 1083F 8052 IMPERIAL_ARAMAIC, // 10840..10855 8053 UNKNOWN, // 10856 8054 IMPERIAL_ARAMAIC, // 10857..1085F 8055 PALMYRENE, // 10860..1087F 8056 NABATAEAN, // 10880..1089E 8057 UNKNOWN, // 1089F..108A6 8058 NABATAEAN, // 108A7..108AF 8059 UNKNOWN, // 108B0..108DF 8060 HATRAN, // 108E0..108F2 8061 UNKNOWN, // 108F3 8062 HATRAN, // 108F4..108F5 8063 UNKNOWN, // 108F6..108FA 8064 HATRAN, // 108FB..108FF 8065 PHOENICIAN, // 10900..1091B 8066 UNKNOWN, // 1091C..1091E 8067 PHOENICIAN, // 1091F 8068 LYDIAN, // 10920..10939 8069 UNKNOWN, // 1093A..1093E 8070 LYDIAN, // 1093F 8071 UNKNOWN, // 10940..1097F 8072 MEROITIC_HIEROGLYPHS, // 10980..1099F 8073 MEROITIC_CURSIVE, // 109A0..109B7 8074 UNKNOWN, // 109B8..109BB 8075 MEROITIC_CURSIVE, // 109BC..109CF 8076 UNKNOWN, // 109D0..109D1 8077 MEROITIC_CURSIVE, // 109D2..109FF 8078 KHAROSHTHI, // 10A00..10A03 8079 UNKNOWN, // 10A04 8080 KHAROSHTHI, // 10A05..10A06 8081 UNKNOWN, // 10A07..10A0B 8082 KHAROSHTHI, // 10A0C..10A13 8083 UNKNOWN, // 10A14 8084 KHAROSHTHI, // 10A15..10A17 8085 UNKNOWN, // 10A18 8086 KHAROSHTHI, // 10A19..10A35 8087 UNKNOWN, // 10A36..10A37 8088 KHAROSHTHI, // 10A38..10A3A 8089 UNKNOWN, // 10A3B..10A3E 8090 KHAROSHTHI, // 10A3F..10A48 8091 UNKNOWN, // 10A49..10A4F 8092 KHAROSHTHI, // 10A50..10A58 8093 UNKNOWN, // 10A59..10A5F 8094 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8095 OLD_NORTH_ARABIAN, // 10A80..10A9F 8096 UNKNOWN, // 10AA0..10ABF 8097 MANICHAEAN, // 10AC0..10AE6 8098 UNKNOWN, // 10AE7..10AEA 8099 MANICHAEAN, // 10AEB..10AF6 8100 UNKNOWN, // 10AF7..10AFF 8101 AVESTAN, // 10B00..10B35 8102 UNKNOWN, // 10B36..10B38 8103 AVESTAN, // 10B39..10B3F 8104 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8105 UNKNOWN, // 10B56..10B57 8106 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8107 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8108 UNKNOWN, // 10B73..10B77 8109 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8110 PSALTER_PAHLAVI, // 10B80..10B91 8111 UNKNOWN, // 10B92..10B98 8112 PSALTER_PAHLAVI, // 10B99..10B9C 8113 UNKNOWN, // 10B9D..10BA8 8114 PSALTER_PAHLAVI, // 10BA9..10BAF 8115 UNKNOWN, // 10BB0..10BFF 8116 OLD_TURKIC, // 10C00..10C48 8117 UNKNOWN, // 10C49..10C7F 8118 OLD_HUNGARIAN, // 10C80..10CB2 8119 UNKNOWN, // 10CB3..10CBF 8120 OLD_HUNGARIAN, // 10CC0..10CF2 8121 UNKNOWN, // 10CF3..10CF9 8122 OLD_HUNGARIAN, // 10CFA..10CFF 8123 HANIFI_ROHINGYA, // 10D00..10D27 8124 UNKNOWN, // 10D28..10D2F 8125 HANIFI_ROHINGYA, // 10D30..10D39 8126 UNKNOWN, // 10D3A..10E5F 8127 ARABIC, // 10E60..10E7E 8128 UNKNOWN, // 10E7F 8129 YEZIDI, // 10E80..10EA9 8130 UNKNOWN, // 10EAA 8131 YEZIDI, // 10EAB..10EAD 8132 UNKNOWN, // 10EAE..10EAF 8133 YEZIDI, // 10EB0..10EB1 8134 UNKNOWN, // 10EB2..10EFC 8135 ARABIC, // 10EFD..10EFF 8136 OLD_SOGDIAN, // 10F00..10F27 8137 UNKNOWN, // 10F28..10F2F 8138 SOGDIAN, // 10F30..10F59 8139 UNKNOWN, // 10F5A..10F6F 8140 OLD_UYGHUR, // 10F70..10F89 8141 UNKNOWN, // 10F8A..10FAF 8142 CHORASMIAN, // 10FB0..10FCB 8143 UNKNOWN, // 10FCC..10FDF 8144 ELYMAIC, // 10FE0..10FF6 8145 UNKNOWN, // 10FF7..10FFF 8146 BRAHMI, // 11000..1104D 8147 UNKNOWN, // 1104E..11051 8148 BRAHMI, // 11052..11075 8149 UNKNOWN, // 11076..1107E 8150 BRAHMI, // 1107F 8151 KAITHI, // 11080..110C2 8152 UNKNOWN, // 110C3..110CC 8153 KAITHI, // 110CD 8154 UNKNOWN, // 110CE..110CF 8155 SORA_SOMPENG, // 110D0..110E8 8156 UNKNOWN, // 110E9..110EF 8157 SORA_SOMPENG, // 110F0..110F9 8158 UNKNOWN, // 110FA..110FF 8159 CHAKMA, // 11100..11134 8160 UNKNOWN, // 11135 8161 CHAKMA, // 11136..11147 8162 UNKNOWN, // 11148..1114F 8163 MAHAJANI, // 11150..11176 8164 UNKNOWN, // 11177..1117F 8165 SHARADA, // 11180..111DF 8166 UNKNOWN, // 111E0 8167 SINHALA, // 111E1..111F4 8168 UNKNOWN, // 111F5..111FF 8169 KHOJKI, // 11200..11211 8170 UNKNOWN, // 11212 8171 KHOJKI, // 11213..11241 8172 UNKNOWN, // 11242..1127F 8173 MULTANI, // 11280..11286 8174 UNKNOWN, // 11287 8175 MULTANI, // 11288 8176 UNKNOWN, // 11289 8177 MULTANI, // 1128A..1128D 8178 UNKNOWN, // 1128E 8179 MULTANI, // 1128F..1129D 8180 UNKNOWN, // 1129E 8181 MULTANI, // 1129F..112A9 8182 UNKNOWN, // 112AA..112AF 8183 KHUDAWADI, // 112B0..112EA 8184 UNKNOWN, // 112EB..112EF 8185 KHUDAWADI, // 112F0..112F9 8186 UNKNOWN, // 112FA..112FF 8187 GRANTHA, // 11300..11303 8188 UNKNOWN, // 11304 8189 GRANTHA, // 11305..1130C 8190 UNKNOWN, // 1130D..1130E 8191 GRANTHA, // 1130F..11310 8192 UNKNOWN, // 11311..11312 8193 GRANTHA, // 11313..11328 8194 UNKNOWN, // 11329 8195 GRANTHA, // 1132A..11330 8196 UNKNOWN, // 11331 8197 GRANTHA, // 11332..11333 8198 UNKNOWN, // 11334 8199 GRANTHA, // 11335..11339 8200 UNKNOWN, // 1133A 8201 INHERITED, // 1133B 8202 GRANTHA, // 1133C..11344 8203 UNKNOWN, // 11345..11346 8204 GRANTHA, // 11347..11348 8205 UNKNOWN, // 11349..1134A 8206 GRANTHA, // 1134B..1134D 8207 UNKNOWN, // 1134E..1134F 8208 GRANTHA, // 11350 8209 UNKNOWN, // 11351..11356 8210 GRANTHA, // 11357 8211 UNKNOWN, // 11358..1135C 8212 GRANTHA, // 1135D..11363 8213 UNKNOWN, // 11364..11365 8214 GRANTHA, // 11366..1136C 8215 UNKNOWN, // 1136D..1136F 8216 GRANTHA, // 11370..11374 8217 UNKNOWN, // 11375..113FF 8218 NEWA, // 11400..1145B 8219 UNKNOWN, // 1145C 8220 NEWA, // 1145D..11461 8221 UNKNOWN, // 11462..1147F 8222 TIRHUTA, // 11480..114C7 8223 UNKNOWN, // 114C8..114CF 8224 TIRHUTA, // 114D0..114D9 8225 UNKNOWN, // 114DA..1157F 8226 SIDDHAM, // 11580..115B5 8227 UNKNOWN, // 115B6..115B7 8228 SIDDHAM, // 115B8..115DD 8229 UNKNOWN, // 115DE..115FF 8230 MODI, // 11600..11644 8231 UNKNOWN, // 11645..1164F 8232 MODI, // 11650..11659 8233 UNKNOWN, // 1165A..1165F 8234 MONGOLIAN, // 11660..1166C 8235 UNKNOWN, // 1166D..1167F 8236 TAKRI, // 11680..116B9 8237 UNKNOWN, // 116BA..116BF 8238 TAKRI, // 116C0..116C9 8239 UNKNOWN, // 116CA..116FF 8240 AHOM, // 11700..1171A 8241 UNKNOWN, // 1171B..1171C 8242 AHOM, // 1171D..1172B 8243 UNKNOWN, // 1172C..1172F 8244 AHOM, // 11730..11746 8245 UNKNOWN, // 11747..117FF 8246 DOGRA, // 11800..1183B 8247 UNKNOWN, // 1183C..1189F 8248 WARANG_CITI, // 118A0..118F2 8249 UNKNOWN, // 118F3..118FE 8250 WARANG_CITI, // 118FF 8251 DIVES_AKURU, // 11900..11906 8252 UNKNOWN, // 11907..11908 8253 DIVES_AKURU, // 11909 8254 UNKNOWN, // 1190A..1190B 8255 DIVES_AKURU, // 1190C..11913 8256 UNKNOWN, // 11914 8257 DIVES_AKURU, // 11915..11916 8258 UNKNOWN, // 11917 8259 DIVES_AKURU, // 11918..11935 8260 UNKNOWN, // 11936 8261 DIVES_AKURU, // 11937..11938 8262 UNKNOWN, // 11939..1193A 8263 DIVES_AKURU, // 1193B..11946 8264 UNKNOWN, // 11947..1194F 8265 DIVES_AKURU, // 11950..11959 8266 UNKNOWN, // 1195A..1199F 8267 NANDINAGARI, // 119A0..119A7 8268 UNKNOWN, // 119A8..119A9 8269 NANDINAGARI, // 119AA..119D7 8270 UNKNOWN, // 119D8..119D9 8271 NANDINAGARI, // 119DA..119E4 8272 UNKNOWN, // 119E5..119FF 8273 ZANABAZAR_SQUARE, // 11A00..11A47 8274 UNKNOWN, // 11A48..11A4F 8275 SOYOMBO, // 11A50..11AA2 8276 UNKNOWN, // 11AA3..11AAF 8277 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8278 PAU_CIN_HAU, // 11AC0..11AF8 8279 UNKNOWN, // 11AF9..11AFF 8280 DEVANAGARI, // 11B00..11B09 8281 UNKNOWN, // 11B0A..11BFF 8282 BHAIKSUKI, // 11C00..11C08 8283 UNKNOWN, // 11C09 8284 BHAIKSUKI, // 11C0A..11C36 8285 UNKNOWN, // 11C37 8286 BHAIKSUKI, // 11C38..11C45 8287 UNKNOWN, // 11C46..11C4F 8288 BHAIKSUKI, // 11C50..11C6C 8289 UNKNOWN, // 11C6D..11C6F 8290 MARCHEN, // 11C70..11C8F 8291 UNKNOWN, // 11C90..11C91 8292 MARCHEN, // 11C92..11CA7 8293 UNKNOWN, // 11CA8 8294 MARCHEN, // 11CA9..11CB6 8295 UNKNOWN, // 11CB7..11CFF 8296 MASARAM_GONDI, // 11D00..11D06 8297 UNKNOWN, // 11D07 8298 MASARAM_GONDI, // 11D08..11D09 8299 UNKNOWN, // 11D0A 8300 MASARAM_GONDI, // 11D0B..11D36 8301 UNKNOWN, // 11D37..11D39 8302 MASARAM_GONDI, // 11D3A 8303 UNKNOWN, // 11D3B 8304 MASARAM_GONDI, // 11D3C..11D3D 8305 UNKNOWN, // 11D3E 8306 MASARAM_GONDI, // 11D3F..11D47 8307 UNKNOWN, // 11D48..11D4F 8308 MASARAM_GONDI, // 11D50..11D59 8309 UNKNOWN, // 11D5A..11D5F 8310 GUNJALA_GONDI, // 11D60..11D65 8311 UNKNOWN, // 11D66 8312 GUNJALA_GONDI, // 11D67..11D68 8313 UNKNOWN, // 11D69 8314 GUNJALA_GONDI, // 11D6A..11D8E 8315 UNKNOWN, // 11D8F 8316 GUNJALA_GONDI, // 11D90..11D91 8317 UNKNOWN, // 11D92 8318 GUNJALA_GONDI, // 11D93..11D98 8319 UNKNOWN, // 11D99..11D9F 8320 GUNJALA_GONDI, // 11DA0..11DA9 8321 UNKNOWN, // 11DAA..11EDF 8322 MAKASAR, // 11EE0..11EF8 8323 UNKNOWN, // 11EF9..11EFF 8324 KAWI, // 11F00..11F10 8325 UNKNOWN, // 11F11 8326 KAWI, // 11F12..11F3A 8327 UNKNOWN, // 11F3B..11F3D 8328 KAWI, // 11F3E..11F59 8329 UNKNOWN, // 11F5A..11FAF 8330 LISU, // 11FB0 8331 UNKNOWN, // 11FB1..11FBF 8332 TAMIL, // 11FC0..11FF1 8333 UNKNOWN, // 11FF2..11FFE 8334 TAMIL, // 11FFF 8335 CUNEIFORM, // 12000..12399 8336 UNKNOWN, // 1239A..123FF 8337 CUNEIFORM, // 12400..1246E 8338 UNKNOWN, // 1246F 8339 CUNEIFORM, // 12470..12474 8340 UNKNOWN, // 12475..1247F 8341 CUNEIFORM, // 12480..12543 8342 UNKNOWN, // 12544..12F8F 8343 CYPRO_MINOAN, // 12F90..12FF2 8344 UNKNOWN, // 12FF3..12FFF 8345 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8346 UNKNOWN, // 13456..143FF 8347 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8348 UNKNOWN, // 14647..167FF 8349 BAMUM, // 16800..16A38 8350 UNKNOWN, // 16A39..16A3F 8351 MRO, // 16A40..16A5E 8352 UNKNOWN, // 16A5F 8353 MRO, // 16A60..16A69 8354 UNKNOWN, // 16A6A..16A6D 8355 MRO, // 16A6E..16A6F 8356 TANGSA, // 16A70..16ABE 8357 UNKNOWN, // 16ABF 8358 TANGSA, // 16AC0..16AC9 8359 UNKNOWN, // 16ACA..16ACF 8360 BASSA_VAH, // 16AD0..16AED 8361 UNKNOWN, // 16AEE..16AEF 8362 BASSA_VAH, // 16AF0..16AF5 8363 UNKNOWN, // 16AF6..16AFF 8364 PAHAWH_HMONG, // 16B00..16B45 8365 UNKNOWN, // 16B46..16B4F 8366 PAHAWH_HMONG, // 16B50..16B59 8367 UNKNOWN, // 16B5A 8368 PAHAWH_HMONG, // 16B5B..16B61 8369 UNKNOWN, // 16B62 8370 PAHAWH_HMONG, // 16B63..16B77 8371 UNKNOWN, // 16B78..16B7C 8372 PAHAWH_HMONG, // 16B7D..16B8F 8373 UNKNOWN, // 16B90..16E3F 8374 MEDEFAIDRIN, // 16E40..16E9A 8375 UNKNOWN, // 16E9B..16EFF 8376 MIAO, // 16F00..16F4A 8377 UNKNOWN, // 16F4B..16F4E 8378 MIAO, // 16F4F..16F87 8379 UNKNOWN, // 16F88..16F8E 8380 MIAO, // 16F8F..16F9F 8381 UNKNOWN, // 16FA0..16FDF 8382 TANGUT, // 16FE0 8383 NUSHU, // 16FE1 8384 HAN, // 16FE2..16FE3 8385 KHITAN_SMALL_SCRIPT, // 16FE4 8386 UNKNOWN, // 16FE5..16FEF 8387 HAN, // 16FF0..16FF1 8388 UNKNOWN, // 16FF2..16FFF 8389 TANGUT, // 17000..187F7 8390 UNKNOWN, // 187F8..187FF 8391 TANGUT, // 18800..18AFF 8392 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8393 UNKNOWN, // 18CD6..18CFF 8394 TANGUT, // 18D00..18D08 8395 UNKNOWN, // 18D09..1AFEF 8396 KATAKANA, // 1AFF0..1AFF3 8397 UNKNOWN, // 1AFF4 8398 KATAKANA, // 1AFF5..1AFFB 8399 UNKNOWN, // 1AFFC 8400 KATAKANA, // 1AFFD..1AFFE 8401 UNKNOWN, // 1AFFF 8402 KATAKANA, // 1B000 8403 HIRAGANA, // 1B001..1B11F 8404 KATAKANA, // 1B120..1B122 8405 UNKNOWN, // 1B123..1B131 8406 HIRAGANA, // 1B132 8407 UNKNOWN, // 1B133..1B14F 8408 HIRAGANA, // 1B150..1B152 8409 UNKNOWN, // 1B153..1B154 8410 KATAKANA, // 1B155 8411 UNKNOWN, // 1B156..1B163 8412 KATAKANA, // 1B164..1B167 8413 UNKNOWN, // 1B168..1B16F 8414 NUSHU, // 1B170..1B2FB 8415 UNKNOWN, // 1B2FC..1BBFF 8416 DUPLOYAN, // 1BC00..1BC6A 8417 UNKNOWN, // 1BC6B..1BC6F 8418 DUPLOYAN, // 1BC70..1BC7C 8419 UNKNOWN, // 1BC7D..1BC7F 8420 DUPLOYAN, // 1BC80..1BC88 8421 UNKNOWN, // 1BC89..1BC8F 8422 DUPLOYAN, // 1BC90..1BC99 8423 UNKNOWN, // 1BC9A..1BC9B 8424 DUPLOYAN, // 1BC9C..1BC9F 8425 COMMON, // 1BCA0..1BCA3 8426 UNKNOWN, // 1BCA4..1CEFF 8427 INHERITED, // 1CF00..1CF2D 8428 UNKNOWN, // 1CF2E..1CF2F 8429 INHERITED, // 1CF30..1CF46 8430 UNKNOWN, // 1CF47..1CF4F 8431 COMMON, // 1CF50..1CFC3 8432 UNKNOWN, // 1CFC4..1CFFF 8433 COMMON, // 1D000..1D0F5 8434 UNKNOWN, // 1D0F6..1D0FF 8435 COMMON, // 1D100..1D126 8436 UNKNOWN, // 1D127..1D128 8437 COMMON, // 1D129..1D166 8438 INHERITED, // 1D167..1D169 8439 COMMON, // 1D16A..1D17A 8440 INHERITED, // 1D17B..1D182 8441 COMMON, // 1D183..1D184 8442 INHERITED, // 1D185..1D18B 8443 COMMON, // 1D18C..1D1A9 8444 INHERITED, // 1D1AA..1D1AD 8445 COMMON, // 1D1AE..1D1EA 8446 UNKNOWN, // 1D1EB..1D1FF 8447 GREEK, // 1D200..1D245 8448 UNKNOWN, // 1D246..1D2BF 8449 COMMON, // 1D2C0..1D2D3 8450 UNKNOWN, // 1D2D4..1D2DF 8451 COMMON, // 1D2E0..1D2F3 8452 UNKNOWN, // 1D2F4..1D2FF 8453 COMMON, // 1D300..1D356 8454 UNKNOWN, // 1D357..1D35F 8455 COMMON, // 1D360..1D378 8456 UNKNOWN, // 1D379..1D3FF 8457 COMMON, // 1D400..1D454 8458 UNKNOWN, // 1D455 8459 COMMON, // 1D456..1D49C 8460 UNKNOWN, // 1D49D 8461 COMMON, // 1D49E..1D49F 8462 UNKNOWN, // 1D4A0..1D4A1 8463 COMMON, // 1D4A2 8464 UNKNOWN, // 1D4A3..1D4A4 8465 COMMON, // 1D4A5..1D4A6 8466 UNKNOWN, // 1D4A7..1D4A8 8467 COMMON, // 1D4A9..1D4AC 8468 UNKNOWN, // 1D4AD 8469 COMMON, // 1D4AE..1D4B9 8470 UNKNOWN, // 1D4BA 8471 COMMON, // 1D4BB 8472 UNKNOWN, // 1D4BC 8473 COMMON, // 1D4BD..1D4C3 8474 UNKNOWN, // 1D4C4 8475 COMMON, // 1D4C5..1D505 8476 UNKNOWN, // 1D506 8477 COMMON, // 1D507..1D50A 8478 UNKNOWN, // 1D50B..1D50C 8479 COMMON, // 1D50D..1D514 8480 UNKNOWN, // 1D515 8481 COMMON, // 1D516..1D51C 8482 UNKNOWN, // 1D51D 8483 COMMON, // 1D51E..1D539 8484 UNKNOWN, // 1D53A 8485 COMMON, // 1D53B..1D53E 8486 UNKNOWN, // 1D53F 8487 COMMON, // 1D540..1D544 8488 UNKNOWN, // 1D545 8489 COMMON, // 1D546 8490 UNKNOWN, // 1D547..1D549 8491 COMMON, // 1D54A..1D550 8492 UNKNOWN, // 1D551 8493 COMMON, // 1D552..1D6A5 8494 UNKNOWN, // 1D6A6..1D6A7 8495 COMMON, // 1D6A8..1D7CB 8496 UNKNOWN, // 1D7CC..1D7CD 8497 COMMON, // 1D7CE..1D7FF 8498 SIGNWRITING, // 1D800..1DA8B 8499 UNKNOWN, // 1DA8C..1DA9A 8500 SIGNWRITING, // 1DA9B..1DA9F 8501 UNKNOWN, // 1DAA0 8502 SIGNWRITING, // 1DAA1..1DAAF 8503 UNKNOWN, // 1DAB0..1DEFF 8504 LATIN, // 1DF00..1DF1E 8505 UNKNOWN, // 1DF1F..1DF24 8506 LATIN, // 1DF25..1DF2A 8507 UNKNOWN, // 1DF2B..1DFFF 8508 GLAGOLITIC, // 1E000..1E006 8509 UNKNOWN, // 1E007 8510 GLAGOLITIC, // 1E008..1E018 8511 UNKNOWN, // 1E019..1E01A 8512 GLAGOLITIC, // 1E01B..1E021 8513 UNKNOWN, // 1E022 8514 GLAGOLITIC, // 1E023..1E024 8515 UNKNOWN, // 1E025 8516 GLAGOLITIC, // 1E026..1E02A 8517 UNKNOWN, // 1E02B..1E02F 8518 CYRILLIC, // 1E030..1E06D 8519 UNKNOWN, // 1E06E..1E08E 8520 CYRILLIC, // 1E08F 8521 UNKNOWN, // 1E090..1E0FF 8522 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8523 UNKNOWN, // 1E12D..1E12F 8524 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8525 UNKNOWN, // 1E13E..1E13F 8526 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8527 UNKNOWN, // 1E14A..1E14D 8528 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8529 UNKNOWN, // 1E150..1E28F 8530 TOTO, // 1E290..1E2AE 8531 UNKNOWN, // 1E2AF..1E2BF 8532 WANCHO, // 1E2C0..1E2F9 8533 UNKNOWN, // 1E2FA..1E2FE 8534 WANCHO, // 1E2FF 8535 UNKNOWN, // 1E300..1E4CF 8536 NAG_MUNDARI, // 1E4D0..1E4F9 8537 UNKNOWN, // 1E4FA..1E7DF 8538 ETHIOPIC, // 1E7E0..1E7E6 8539 UNKNOWN, // 1E7E7 8540 ETHIOPIC, // 1E7E8..1E7EB 8541 UNKNOWN, // 1E7EC 8542 ETHIOPIC, // 1E7ED..1E7EE 8543 UNKNOWN, // 1E7EF 8544 ETHIOPIC, // 1E7F0..1E7FE 8545 UNKNOWN, // 1E7FF 8546 MENDE_KIKAKUI, // 1E800..1E8C4 8547 UNKNOWN, // 1E8C5..1E8C6 8548 MENDE_KIKAKUI, // 1E8C7..1E8D6 8549 UNKNOWN, // 1E8D7..1E8FF 8550 ADLAM, // 1E900..1E94B 8551 UNKNOWN, // 1E94C..1E94F 8552 ADLAM, // 1E950..1E959 8553 UNKNOWN, // 1E95A..1E95D 8554 ADLAM, // 1E95E..1E95F 8555 UNKNOWN, // 1E960..1EC70 8556 COMMON, // 1EC71..1ECB4 8557 UNKNOWN, // 1ECB5..1ED00 8558 COMMON, // 1ED01..1ED3D 8559 UNKNOWN, // 1ED3E..1EDFF 8560 ARABIC, // 1EE00..1EE03 8561 UNKNOWN, // 1EE04 8562 ARABIC, // 1EE05..1EE1F 8563 UNKNOWN, // 1EE20 8564 ARABIC, // 1EE21..1EE22 8565 UNKNOWN, // 1EE23 8566 ARABIC, // 1EE24 8567 UNKNOWN, // 1EE25..1EE26 8568 ARABIC, // 1EE27 8569 UNKNOWN, // 1EE28 8570 ARABIC, // 1EE29..1EE32 8571 UNKNOWN, // 1EE33 8572 ARABIC, // 1EE34..1EE37 8573 UNKNOWN, // 1EE38 8574 ARABIC, // 1EE39 8575 UNKNOWN, // 1EE3A 8576 ARABIC, // 1EE3B 8577 UNKNOWN, // 1EE3C..1EE41 8578 ARABIC, // 1EE42 8579 UNKNOWN, // 1EE43..1EE46 8580 ARABIC, // 1EE47 8581 UNKNOWN, // 1EE48 8582 ARABIC, // 1EE49 8583 UNKNOWN, // 1EE4A 8584 ARABIC, // 1EE4B 8585 UNKNOWN, // 1EE4C 8586 ARABIC, // 1EE4D..1EE4F 8587 UNKNOWN, // 1EE50 8588 ARABIC, // 1EE51..1EE52 8589 UNKNOWN, // 1EE53 8590 ARABIC, // 1EE54 8591 UNKNOWN, // 1EE55..1EE56 8592 ARABIC, // 1EE57 8593 UNKNOWN, // 1EE58 8594 ARABIC, // 1EE59 8595 UNKNOWN, // 1EE5A 8596 ARABIC, // 1EE5B 8597 UNKNOWN, // 1EE5C 8598 ARABIC, // 1EE5D 8599 UNKNOWN, // 1EE5E 8600 ARABIC, // 1EE5F 8601 UNKNOWN, // 1EE60 8602 ARABIC, // 1EE61..1EE62 8603 UNKNOWN, // 1EE63 8604 ARABIC, // 1EE64 8605 UNKNOWN, // 1EE65..1EE66 8606 ARABIC, // 1EE67..1EE6A 8607 UNKNOWN, // 1EE6B 8608 ARABIC, // 1EE6C..1EE72 8609 UNKNOWN, // 1EE73 8610 ARABIC, // 1EE74..1EE77 8611 UNKNOWN, // 1EE78 8612 ARABIC, // 1EE79..1EE7C 8613 UNKNOWN, // 1EE7D 8614 ARABIC, // 1EE7E 8615 UNKNOWN, // 1EE7F 8616 ARABIC, // 1EE80..1EE89 8617 UNKNOWN, // 1EE8A 8618 ARABIC, // 1EE8B..1EE9B 8619 UNKNOWN, // 1EE9C..1EEA0 8620 ARABIC, // 1EEA1..1EEA3 8621 UNKNOWN, // 1EEA4 8622 ARABIC, // 1EEA5..1EEA9 8623 UNKNOWN, // 1EEAA 8624 ARABIC, // 1EEAB..1EEBB 8625 UNKNOWN, // 1EEBC..1EEEF 8626 ARABIC, // 1EEF0..1EEF1 8627 UNKNOWN, // 1EEF2..1EFFF 8628 COMMON, // 1F000..1F02B 8629 UNKNOWN, // 1F02C..1F02F 8630 COMMON, // 1F030..1F093 8631 UNKNOWN, // 1F094..1F09F 8632 COMMON, // 1F0A0..1F0AE 8633 UNKNOWN, // 1F0AF..1F0B0 8634 COMMON, // 1F0B1..1F0BF 8635 UNKNOWN, // 1F0C0 8636 COMMON, // 1F0C1..1F0CF 8637 UNKNOWN, // 1F0D0 8638 COMMON, // 1F0D1..1F0F5 8639 UNKNOWN, // 1F0F6..1F0FF 8640 COMMON, // 1F100..1F1AD 8641 UNKNOWN, // 1F1AE..1F1E5 8642 COMMON, // 1F1E6..1F1FF 8643 HIRAGANA, // 1F200 8644 COMMON, // 1F201..1F202 8645 UNKNOWN, // 1F203..1F20F 8646 COMMON, // 1F210..1F23B 8647 UNKNOWN, // 1F23C..1F23F 8648 COMMON, // 1F240..1F248 8649 UNKNOWN, // 1F249..1F24F 8650 COMMON, // 1F250..1F251 8651 UNKNOWN, // 1F252..1F25F 8652 COMMON, // 1F260..1F265 8653 UNKNOWN, // 1F266..1F2FF 8654 COMMON, // 1F300..1F6D7 8655 UNKNOWN, // 1F6D8..1F6DB 8656 COMMON, // 1F6DC..1F6EC 8657 UNKNOWN, // 1F6ED..1F6EF 8658 COMMON, // 1F6F0..1F6FC 8659 UNKNOWN, // 1F6FD..1F6FF 8660 COMMON, // 1F700..1F776 8661 UNKNOWN, // 1F777..1F77A 8662 COMMON, // 1F77B..1F7D9 8663 UNKNOWN, // 1F7DA..1F7DF 8664 COMMON, // 1F7E0..1F7EB 8665 UNKNOWN, // 1F7EC..1F7EF 8666 COMMON, // 1F7F0 8667 UNKNOWN, // 1F7F1..1F7FF 8668 COMMON, // 1F800..1F80B 8669 UNKNOWN, // 1F80C..1F80F 8670 COMMON, // 1F810..1F847 8671 UNKNOWN, // 1F848..1F84F 8672 COMMON, // 1F850..1F859 8673 UNKNOWN, // 1F85A..1F85F 8674 COMMON, // 1F860..1F887 8675 UNKNOWN, // 1F888..1F88F 8676 COMMON, // 1F890..1F8AD 8677 UNKNOWN, // 1F8AE..1F8AF 8678 COMMON, // 1F8B0..1F8B1 8679 UNKNOWN, // 1F8B2..1F8FF 8680 COMMON, // 1F900..1FA53 8681 UNKNOWN, // 1FA54..1FA5F 8682 COMMON, // 1FA60..1FA6D 8683 UNKNOWN, // 1FA6E..1FA6F 8684 COMMON, // 1FA70..1FA7C 8685 UNKNOWN, // 1FA7D..1FA7F 8686 COMMON, // 1FA80..1FA88 8687 UNKNOWN, // 1FA89..1FA8F 8688 COMMON, // 1FA90..1FABD 8689 UNKNOWN, // 1FABE 8690 COMMON, // 1FABF..1FAC5 8691 UNKNOWN, // 1FAC6..1FACD 8692 COMMON, // 1FACE..1FADB 8693 UNKNOWN, // 1FADC..1FADF 8694 COMMON, // 1FAE0..1FAE8 8695 UNKNOWN, // 1FAE9..1FAEF 8696 COMMON, // 1FAF0..1FAF8 8697 UNKNOWN, // 1FAF9..1FAFF 8698 COMMON, // 1FB00..1FB92 8699 UNKNOWN, // 1FB93 8700 COMMON, // 1FB94..1FBCA 8701 UNKNOWN, // 1FBCB..1FBEF 8702 COMMON, // 1FBF0..1FBF9 8703 UNKNOWN, // 1FBFA..1FFFF 8704 HAN, // 20000..2A6DF 8705 UNKNOWN, // 2A6E0..2A6FF 8706 HAN, // 2A700..2B739 8707 UNKNOWN, // 2B73A..2B73F 8708 HAN, // 2B740..2B81D 8709 UNKNOWN, // 2B81E..2B81F 8710 HAN, // 2B820..2CEA1 8711 UNKNOWN, // 2CEA2..2CEAF 8712 HAN, // 2CEB0..2EBE0 8713 UNKNOWN, // 2EBE1..2EBEF 8714 HAN, // 2EBF0..2EE5D 8715 UNKNOWN, // 2EE5E..2F7FF 8716 HAN, // 2F800..2FA1D 8717 UNKNOWN, // 2FA1E..2FFFF 8718 HAN, // 30000..3134A 8719 UNKNOWN, // 3134B..3134F 8720 HAN, // 31350..323AF 8721 UNKNOWN, // 323B0..E0000 8722 COMMON, // E0001 8723 UNKNOWN, // E0002..E001F 8724 COMMON, // E0020..E007F 8725 UNKNOWN, // E0080..E00FF 8726 INHERITED, // E0100..E01EF 8727 UNKNOWN, // E01F0..10FFFF 8728 }; 8729 8730 private static final HashMap<String, Character.UnicodeScript> aliases; 8731 static { 8732 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 8733 aliases.put("ADLM", ADLAM); 8734 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8735 aliases.put("AHOM", AHOM); 8736 aliases.put("ARAB", ARABIC); 8737 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8738 aliases.put("ARMN", ARMENIAN); 8739 aliases.put("AVST", AVESTAN); 8740 aliases.put("BALI", BALINESE); 8741 aliases.put("BAMU", BAMUM); 8742 aliases.put("BASS", BASSA_VAH); 8743 aliases.put("BATK", BATAK); 8744 aliases.put("BENG", BENGALI); 8745 aliases.put("BHKS", BHAIKSUKI); 8746 aliases.put("BOPO", BOPOMOFO); 8747 aliases.put("BRAH", BRAHMI); 8748 aliases.put("BRAI", BRAILLE); 8749 aliases.put("BUGI", BUGINESE); 8750 aliases.put("BUHD", BUHID); 8751 aliases.put("CAKM", CHAKMA); 8752 aliases.put("CANS", CANADIAN_ABORIGINAL); 8753 aliases.put("CARI", CARIAN); 8754 aliases.put("CHAM", CHAM); 8755 aliases.put("CHER", CHEROKEE); 8756 aliases.put("CHRS", CHORASMIAN); 8757 aliases.put("COPT", COPTIC); 8758 aliases.put("CPMN", CYPRO_MINOAN); 8759 aliases.put("CPRT", CYPRIOT); 8760 aliases.put("CYRL", CYRILLIC); 8761 aliases.put("DEVA", DEVANAGARI); 8762 aliases.put("DIAK", DIVES_AKURU); 8763 aliases.put("DOGR", DOGRA); 8764 aliases.put("DSRT", DESERET); 8765 aliases.put("DUPL", DUPLOYAN); 8766 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 8767 aliases.put("ELBA", ELBASAN); 8768 aliases.put("ELYM", ELYMAIC); 8769 aliases.put("ETHI", ETHIOPIC); 8770 aliases.put("GEOR", GEORGIAN); 8771 aliases.put("GLAG", GLAGOLITIC); 8772 aliases.put("GONG", GUNJALA_GONDI); 8773 aliases.put("GONM", MASARAM_GONDI); 8774 aliases.put("GOTH", GOTHIC); 8775 aliases.put("GRAN", GRANTHA); 8776 aliases.put("GREK", GREEK); 8777 aliases.put("GUJR", GUJARATI); 8778 aliases.put("GURU", GURMUKHI); 8779 aliases.put("HANG", HANGUL); 8780 aliases.put("HANI", HAN); 8781 aliases.put("HANO", HANUNOO); 8782 aliases.put("HATR", HATRAN); 8783 aliases.put("HEBR", HEBREW); 8784 aliases.put("HIRA", HIRAGANA); 8785 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 8786 aliases.put("HMNG", PAHAWH_HMONG); 8787 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 8788 aliases.put("HUNG", OLD_HUNGARIAN); 8789 aliases.put("ITAL", OLD_ITALIC); 8790 aliases.put("JAVA", JAVANESE); 8791 aliases.put("KALI", KAYAH_LI); 8792 aliases.put("KANA", KATAKANA); 8793 aliases.put("KAWI", KAWI); 8794 aliases.put("KHAR", KHAROSHTHI); 8795 aliases.put("KHMR", KHMER); 8796 aliases.put("KHOJ", KHOJKI); 8797 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 8798 aliases.put("KNDA", KANNADA); 8799 aliases.put("KTHI", KAITHI); 8800 aliases.put("LANA", TAI_THAM); 8801 aliases.put("LAOO", LAO); 8802 aliases.put("LATN", LATIN); 8803 aliases.put("LEPC", LEPCHA); 8804 aliases.put("LIMB", LIMBU); 8805 aliases.put("LINA", LINEAR_A); 8806 aliases.put("LINB", LINEAR_B); 8807 aliases.put("LISU", LISU); 8808 aliases.put("LYCI", LYCIAN); 8809 aliases.put("LYDI", LYDIAN); 8810 aliases.put("MAHJ", MAHAJANI); 8811 aliases.put("MAKA", MAKASAR); 8812 aliases.put("MAND", MANDAIC); 8813 aliases.put("MANI", MANICHAEAN); 8814 aliases.put("MARC", MARCHEN); 8815 aliases.put("MEDF", MEDEFAIDRIN); 8816 aliases.put("MEND", MENDE_KIKAKUI); 8817 aliases.put("MERC", MEROITIC_CURSIVE); 8818 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 8819 aliases.put("MLYM", MALAYALAM); 8820 aliases.put("MODI", MODI); 8821 aliases.put("MONG", MONGOLIAN); 8822 aliases.put("MROO", MRO); 8823 aliases.put("MTEI", MEETEI_MAYEK); 8824 aliases.put("MULT", MULTANI); 8825 aliases.put("MYMR", MYANMAR); 8826 aliases.put("NAGM", NAG_MUNDARI); 8827 aliases.put("NAND", NANDINAGARI); 8828 aliases.put("NARB", OLD_NORTH_ARABIAN); 8829 aliases.put("NBAT", NABATAEAN); 8830 aliases.put("NEWA", NEWA); 8831 aliases.put("NKOO", NKO); 8832 aliases.put("NSHU", NUSHU); 8833 aliases.put("OGAM", OGHAM); 8834 aliases.put("OLCK", OL_CHIKI); 8835 aliases.put("ORKH", OLD_TURKIC); 8836 aliases.put("ORYA", ORIYA); 8837 aliases.put("OSGE", OSAGE); 8838 aliases.put("OSMA", OSMANYA); 8839 aliases.put("OUGR", OLD_UYGHUR); 8840 aliases.put("PALM", PALMYRENE); 8841 aliases.put("PAUC", PAU_CIN_HAU); 8842 aliases.put("PERM", OLD_PERMIC); 8843 aliases.put("PHAG", PHAGS_PA); 8844 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8845 aliases.put("PHLP", PSALTER_PAHLAVI); 8846 aliases.put("PHNX", PHOENICIAN); 8847 aliases.put("PLRD", MIAO); 8848 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8849 aliases.put("RJNG", REJANG); 8850 aliases.put("ROHG", HANIFI_ROHINGYA); 8851 aliases.put("RUNR", RUNIC); 8852 aliases.put("SAMR", SAMARITAN); 8853 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8854 aliases.put("SAUR", SAURASHTRA); 8855 aliases.put("SGNW", SIGNWRITING); 8856 aliases.put("SHAW", SHAVIAN); 8857 aliases.put("SHRD", SHARADA); 8858 aliases.put("SIDD", SIDDHAM); 8859 aliases.put("SIND", KHUDAWADI); 8860 aliases.put("SINH", SINHALA); 8861 aliases.put("SOGD", SOGDIAN); 8862 aliases.put("SOGO", OLD_SOGDIAN); 8863 aliases.put("SORA", SORA_SOMPENG); 8864 aliases.put("SOYO", SOYOMBO); 8865 aliases.put("SUND", SUNDANESE); 8866 aliases.put("SYLO", SYLOTI_NAGRI); 8867 aliases.put("SYRC", SYRIAC); 8868 aliases.put("TAGB", TAGBANWA); 8869 aliases.put("TAKR", TAKRI); 8870 aliases.put("TALE", TAI_LE); 8871 aliases.put("TALU", NEW_TAI_LUE); 8872 aliases.put("TAML", TAMIL); 8873 aliases.put("TANG", TANGUT); 8874 aliases.put("TAVT", TAI_VIET); 8875 aliases.put("TELU", TELUGU); 8876 aliases.put("TFNG", TIFINAGH); 8877 aliases.put("TGLG", TAGALOG); 8878 aliases.put("THAA", THAANA); 8879 aliases.put("THAI", THAI); 8880 aliases.put("TIBT", TIBETAN); 8881 aliases.put("TIRH", TIRHUTA); 8882 aliases.put("TNSA", TANGSA); 8883 aliases.put("TOTO", TOTO); 8884 aliases.put("UGAR", UGARITIC); 8885 aliases.put("VAII", VAI); 8886 aliases.put("VITH", VITHKUQI); 8887 aliases.put("WARA", WARANG_CITI); 8888 aliases.put("WCHO", WANCHO); 8889 aliases.put("XPEO", OLD_PERSIAN); 8890 aliases.put("XSUX", CUNEIFORM); 8891 aliases.put("YEZI", YEZIDI); 8892 aliases.put("YIII", YI); 8893 aliases.put("ZANB", ZANABAZAR_SQUARE); 8894 aliases.put("ZINH", INHERITED); 8895 aliases.put("ZYYY", COMMON); 8896 aliases.put("ZZZZ", UNKNOWN); 8897 } 8898 8899 /** 8900 * Returns the enum constant representing the Unicode script of which 8901 * the given character (Unicode code point) is assigned to. 8902 * 8903 * @param codePoint the character (Unicode code point) in question. 8904 * @return The {@code UnicodeScript} constant representing the 8905 * Unicode script of which this character is assigned to. 8906 * 8907 * @throws IllegalArgumentException if the specified 8908 * {@code codePoint} is an invalid Unicode code point. 8909 * @see Character#isValidCodePoint(int) 8910 * 8911 */ 8912 public static UnicodeScript of(int codePoint) { 8913 if (!isValidCodePoint(codePoint)) 8914 throw new IllegalArgumentException( 8915 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8916 int type = getType(codePoint); 8917 // leave SURROGATE and PRIVATE_USE for table lookup 8918 if (type == UNASSIGNED) 8919 return UNKNOWN; 8920 int index = Arrays.binarySearch(scriptStarts, codePoint); 8921 if (index < 0) 8922 index = -index - 2; 8923 return scripts[index]; 8924 } 8925 8926 /** 8927 * Returns the UnicodeScript constant with the given Unicode script 8928 * name or the script name alias. Script names and their aliases are 8929 * determined by The Unicode Standard. The files {@code Scripts.txt} 8930 * and {@code PropertyValueAliases.txt} define script names 8931 * and the script name aliases for a particular version of the 8932 * standard. The {@link Character} class specifies the version of 8933 * the standard that it supports. 8934 * <p> 8935 * Character case is ignored for all of the valid script names. 8936 * The en_US locale's case mapping rules are used to provide 8937 * case-insensitive string comparisons for script name validation. 8938 * 8939 * @param scriptName A {@code UnicodeScript} name. 8940 * @return The {@code UnicodeScript} constant identified 8941 * by {@code scriptName} 8942 * @throws IllegalArgumentException if {@code scriptName} is an 8943 * invalid name 8944 * @throws NullPointerException if {@code scriptName} is null 8945 */ 8946 public static final UnicodeScript forName(String scriptName) { 8947 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8948 //.replace(' ', '_')); 8949 UnicodeScript sc = aliases.get(scriptName); 8950 if (sc != null) 8951 return sc; 8952 return valueOf(scriptName); 8953 } 8954 } 8955 8956 /** 8957 * The value of the {@code Character}. 8958 * 8959 * @serial 8960 */ 8961 private final char value; 8962 8963 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8964 @java.io.Serial 8965 private static final long serialVersionUID = 3786198910865385080L; 8966 8967 /** 8968 * Constructs a newly allocated {@code Character} object that 8969 * represents the specified {@code char} value. 8970 * 8971 * @param value the value to be represented by the 8972 * {@code Character} object. 8973 * 8974 * @deprecated 8975 * It is rarely appropriate to use this constructor. The static factory 8976 * {@link #valueOf(char)} is generally a better choice, as it is 8977 * likely to yield significantly better space and time performance. 8978 */ 8979 @Deprecated(since="9", forRemoval = true) 8980 public Character(char value) { 8981 this.value = value; 8982 } 8983 8984 private static final class CharacterCache { 8985 private CharacterCache(){} 8986 8987 @Stable 8988 static final Character[] cache; 8989 static Character[] archivedCache; 8990 8991 static { 8992 int size = 127 + 1; 8993 8994 // Load and use the archived cache if it exists 8995 CDS.initializeFromArchive(CharacterCache.class); 8996 if (archivedCache == null) { 8997 Character[] c = new Character[size]; 8998 for (int i = 0; i < size; i++) { 8999 c[i] = new Character((char) i); 9000 } 9001 archivedCache = c; 9002 } 9003 cache = archivedCache; 9004 assert cache.length == size; 9005 } 9006 } 9007 9008 /** 9009 * Returns a {@code Character} instance representing the specified 9010 * {@code char} value. 9011 * If a new {@code Character} instance is not required, this method 9012 * should generally be used in preference to the constructor 9013 * {@link #Character(char)}, as this method is likely to yield 9014 * significantly better space and time performance by caching 9015 * frequently requested values. 9016 * 9017 * This method will always cache values in the range {@code 9018 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9019 * cache other values outside of this range. 9020 * 9021 * @param c a char value. 9022 * @return a {@code Character} instance representing {@code c}. 9023 * @since 1.5 9024 */ 9025 @IntrinsicCandidate 9026 @DeserializeConstructor 9027 public static Character valueOf(char c) { 9028 if (c <= 127) { // must cache 9029 return CharacterCache.cache[(int)c]; 9030 } 9031 return new Character(c); 9032 } 9033 9034 /** 9035 * Returns the value of this {@code Character} object. 9036 * @return the primitive {@code char} value represented by 9037 * this object. 9038 */ 9039 @IntrinsicCandidate 9040 public char charValue() { 9041 return value; 9042 } 9043 9044 /** 9045 * Returns a hash code for this {@code Character}; equal to the result 9046 * of invoking {@code charValue()}. 9047 * 9048 * @return a hash code value for this {@code Character} 9049 */ 9050 @Override 9051 public int hashCode() { 9052 return Character.hashCode(value); 9053 } 9054 9055 /** 9056 * Returns a hash code for a {@code char} value; compatible with 9057 * {@code Character.hashCode()}. 9058 * 9059 * @since 1.8 9060 * 9061 * @param value The {@code char} for which to return a hash code. 9062 * @return a hash code value for a {@code char} value. 9063 */ 9064 public static int hashCode(char value) { 9065 return (int)value; 9066 } 9067 9068 /** 9069 * Compares this object against the specified object. 9070 * The result is {@code true} if and only if the argument is not 9071 * {@code null} and is a {@code Character} object that 9072 * represents the same {@code char} value as this object. 9073 * 9074 * @param obj the object to compare with. 9075 * @return {@code true} if the objects are the same; 9076 * {@code false} otherwise. 9077 */ 9078 public boolean equals(Object obj) { 9079 if (obj instanceof Character c) { 9080 return value == c.charValue(); 9081 } 9082 return false; 9083 } 9084 9085 /** 9086 * Returns a {@code String} object representing this 9087 * {@code Character}'s value. The result is a string of 9088 * length 1 whose sole component is the primitive 9089 * {@code char} value represented by this 9090 * {@code Character} object. 9091 * 9092 * @return a string representation of this object. 9093 */ 9094 @Override 9095 public String toString() { 9096 return String.valueOf(value); 9097 } 9098 9099 /** 9100 * Returns a {@code String} object representing the 9101 * specified {@code char}. The result is a string of length 9102 * 1 consisting solely of the specified {@code char}. 9103 * 9104 * @apiNote This method cannot handle <a 9105 * href="#supplementary"> supplementary characters</a>. To support 9106 * all Unicode characters, including supplementary characters, use 9107 * the {@link #toString(int)} method. 9108 * 9109 * @param c the {@code char} to be converted 9110 * @return the string representation of the specified {@code char} 9111 * @since 1.4 9112 */ 9113 public static String toString(char c) { 9114 return String.valueOf(c); 9115 } 9116 9117 /** 9118 * Returns a {@code String} object representing the 9119 * specified character (Unicode code point). The result is a string of 9120 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9121 * 9122 * @param codePoint the {@code codePoint} to be converted 9123 * @return the string representation of the specified {@code codePoint} 9124 * @throws IllegalArgumentException if the specified 9125 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9126 * valid Unicode code point}. 9127 * @since 11 9128 */ 9129 public static String toString(int codePoint) { 9130 return String.valueOfCodePoint(codePoint); 9131 } 9132 9133 /** 9134 * Determines whether the specified code point is a valid 9135 * <a href="http://www.unicode.org/glossary/#code_point"> 9136 * Unicode code point value</a>. 9137 * 9138 * @param codePoint the Unicode code point to be tested 9139 * @return {@code true} if the specified code point value is between 9140 * {@link #MIN_CODE_POINT} and 9141 * {@link #MAX_CODE_POINT} inclusive; 9142 * {@code false} otherwise. 9143 * @since 1.5 9144 */ 9145 public static boolean isValidCodePoint(int codePoint) { 9146 // Optimized form of: 9147 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9148 int plane = codePoint >>> 16; 9149 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9150 } 9151 9152 /** 9153 * Determines whether the specified character (Unicode code point) 9154 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9155 * Such code points can be represented using a single {@code char}. 9156 * 9157 * @param codePoint the character (Unicode code point) to be tested 9158 * @return {@code true} if the specified code point is between 9159 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9160 * {@code false} otherwise. 9161 * @since 1.7 9162 */ 9163 public static boolean isBmpCodePoint(int codePoint) { 9164 return codePoint >>> 16 == 0; 9165 // Optimized form of: 9166 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9167 // We consistently use logical shift (>>>) to facilitate 9168 // additional runtime optimizations. 9169 } 9170 9171 /** 9172 * Determines whether the specified character (Unicode code point) 9173 * is in the <a href="#supplementary">supplementary character</a> range. 9174 * 9175 * @param codePoint the character (Unicode code point) to be tested 9176 * @return {@code true} if the specified code point is between 9177 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9178 * {@link #MAX_CODE_POINT} inclusive; 9179 * {@code false} otherwise. 9180 * @since 1.5 9181 */ 9182 public static boolean isSupplementaryCodePoint(int codePoint) { 9183 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9184 && codePoint < MAX_CODE_POINT + 1; 9185 } 9186 9187 /** 9188 * Determines if the given {@code char} value is a 9189 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9190 * Unicode high-surrogate code unit</a> 9191 * (also known as <i>leading-surrogate code unit</i>). 9192 * 9193 * <p>Such values do not represent characters by themselves, 9194 * but are used in the representation of 9195 * <a href="#supplementary">supplementary characters</a> 9196 * in the UTF-16 encoding. 9197 * 9198 * @param ch the {@code char} value to be tested. 9199 * @return {@code true} if the {@code char} value is between 9200 * {@link #MIN_HIGH_SURROGATE} and 9201 * {@link #MAX_HIGH_SURROGATE} inclusive; 9202 * {@code false} otherwise. 9203 * @see Character#isLowSurrogate(char) 9204 * @see Character.UnicodeBlock#of(int) 9205 * @since 1.5 9206 */ 9207 public static boolean isHighSurrogate(char ch) { 9208 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9209 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9210 } 9211 9212 /** 9213 * Determines if the given {@code char} value is a 9214 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9215 * Unicode low-surrogate code unit</a> 9216 * (also known as <i>trailing-surrogate code unit</i>). 9217 * 9218 * <p>Such values do not represent characters by themselves, 9219 * but are used in the representation of 9220 * <a href="#supplementary">supplementary characters</a> 9221 * in the UTF-16 encoding. 9222 * 9223 * @param ch the {@code char} value to be tested. 9224 * @return {@code true} if the {@code char} value is between 9225 * {@link #MIN_LOW_SURROGATE} and 9226 * {@link #MAX_LOW_SURROGATE} inclusive; 9227 * {@code false} otherwise. 9228 * @see Character#isHighSurrogate(char) 9229 * @since 1.5 9230 */ 9231 public static boolean isLowSurrogate(char ch) { 9232 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9233 } 9234 9235 /** 9236 * Determines if the given {@code char} value is a Unicode 9237 * <i>surrogate code unit</i>. 9238 * 9239 * <p>Such values do not represent characters by themselves, 9240 * but are used in the representation of 9241 * <a href="#supplementary">supplementary characters</a> 9242 * in the UTF-16 encoding. 9243 * 9244 * <p>A char value is a surrogate code unit if and only if it is either 9245 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9246 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9247 * 9248 * @param ch the {@code char} value to be tested. 9249 * @return {@code true} if the {@code char} value is between 9250 * {@link #MIN_SURROGATE} and 9251 * {@link #MAX_SURROGATE} inclusive; 9252 * {@code false} otherwise. 9253 * @since 1.7 9254 */ 9255 public static boolean isSurrogate(char ch) { 9256 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9257 } 9258 9259 /** 9260 * Determines whether the specified pair of {@code char} 9261 * values is a valid 9262 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9263 * Unicode surrogate pair</a>. 9264 * 9265 * <p>This method is equivalent to the expression: 9266 * <blockquote><pre>{@code 9267 * isHighSurrogate(high) && isLowSurrogate(low) 9268 * }</pre></blockquote> 9269 * 9270 * @param high the high-surrogate code value to be tested 9271 * @param low the low-surrogate code value to be tested 9272 * @return {@code true} if the specified high and 9273 * low-surrogate code values represent a valid surrogate pair; 9274 * {@code false} otherwise. 9275 * @since 1.5 9276 */ 9277 public static boolean isSurrogatePair(char high, char low) { 9278 return isHighSurrogate(high) && isLowSurrogate(low); 9279 } 9280 9281 /** 9282 * Determines the number of {@code char} values needed to 9283 * represent the specified character (Unicode code point). If the 9284 * specified character is equal to or greater than 0x10000, then 9285 * the method returns 2. Otherwise, the method returns 1. 9286 * 9287 * <p>This method doesn't validate the specified character to be a 9288 * valid Unicode code point. The caller must validate the 9289 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9290 * if necessary. 9291 * 9292 * @param codePoint the character (Unicode code point) to be tested. 9293 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9294 * @see Character#isSupplementaryCodePoint(int) 9295 * @since 1.5 9296 */ 9297 public static int charCount(int codePoint) { 9298 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9299 } 9300 9301 /** 9302 * Converts the specified surrogate pair to its supplementary code 9303 * point value. This method does not validate the specified 9304 * surrogate pair. The caller must validate it using {@link 9305 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9306 * 9307 * @param high the high-surrogate code unit 9308 * @param low the low-surrogate code unit 9309 * @return the supplementary code point composed from the 9310 * specified surrogate pair. 9311 * @since 1.5 9312 */ 9313 public static int toCodePoint(char high, char low) { 9314 // Optimized form of: 9315 // return ((high - MIN_HIGH_SURROGATE) << 10) 9316 // + (low - MIN_LOW_SURROGATE) 9317 // + MIN_SUPPLEMENTARY_CODE_POINT; 9318 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9319 - (MIN_HIGH_SURROGATE << 10) 9320 - MIN_LOW_SURROGATE); 9321 } 9322 9323 /** 9324 * Returns the code point at the given index of the 9325 * {@code CharSequence}. If the {@code char} value at 9326 * the given index in the {@code CharSequence} is in the 9327 * high-surrogate range, the following index is less than the 9328 * length of the {@code CharSequence}, and the 9329 * {@code char} value at the following index is in the 9330 * low-surrogate range, then the supplementary code point 9331 * corresponding to this surrogate pair is returned. Otherwise, 9332 * the {@code char} value at the given index is returned. 9333 * 9334 * @param seq a sequence of {@code char} values (Unicode code 9335 * units) 9336 * @param index the index to the {@code char} values (Unicode 9337 * code units) in {@code seq} to be converted 9338 * @return the Unicode code point at the given index 9339 * @throws NullPointerException if {@code seq} is null. 9340 * @throws IndexOutOfBoundsException if the value 9341 * {@code index} is negative or not less than 9342 * {@link CharSequence#length() seq.length()}. 9343 * @since 1.5 9344 */ 9345 public static int codePointAt(CharSequence seq, int index) { 9346 char c1 = seq.charAt(index); 9347 if (isHighSurrogate(c1) && ++index < seq.length()) { 9348 char c2 = seq.charAt(index); 9349 if (isLowSurrogate(c2)) { 9350 return toCodePoint(c1, c2); 9351 } 9352 } 9353 return c1; 9354 } 9355 9356 /** 9357 * Returns the code point at the given index of the 9358 * {@code char} array. If the {@code char} value at 9359 * the given index in the {@code char} array is in the 9360 * high-surrogate range, the following index is less than the 9361 * length of the {@code char} array, and the 9362 * {@code char} value at the following index is in the 9363 * low-surrogate range, then the supplementary code point 9364 * corresponding to this surrogate pair is returned. Otherwise, 9365 * the {@code char} value at the given index is returned. 9366 * 9367 * @param a the {@code char} array 9368 * @param index the index to the {@code char} values (Unicode 9369 * code units) in the {@code char} array to be converted 9370 * @return the Unicode code point at the given index 9371 * @throws NullPointerException if {@code a} is null. 9372 * @throws IndexOutOfBoundsException if the value 9373 * {@code index} is negative or not less than 9374 * the length of the {@code char} array. 9375 * @since 1.5 9376 */ 9377 public static int codePointAt(char[] a, int index) { 9378 return codePointAtImpl(a, index, a.length); 9379 } 9380 9381 /** 9382 * Returns the code point at the given index of the 9383 * {@code char} array, where only array elements with 9384 * {@code index} less than {@code limit} can be used. If 9385 * the {@code char} value at the given index in the 9386 * {@code char} array is in the high-surrogate range, the 9387 * following index is less than the {@code limit}, and the 9388 * {@code char} value at the following index is in the 9389 * low-surrogate range, then the supplementary code point 9390 * corresponding to this surrogate pair is returned. Otherwise, 9391 * the {@code char} value at the given index is returned. 9392 * 9393 * @param a the {@code char} array 9394 * @param index the index to the {@code char} values (Unicode 9395 * code units) in the {@code char} array to be converted 9396 * @param limit the index after the last array element that 9397 * can be used in the {@code char} array 9398 * @return the Unicode code point at the given index 9399 * @throws NullPointerException if {@code a} is null. 9400 * @throws IndexOutOfBoundsException if the {@code index} 9401 * argument is negative or not less than the {@code limit} 9402 * argument, or if the {@code limit} argument is negative or 9403 * greater than the length of the {@code char} array. 9404 * @since 1.5 9405 */ 9406 public static int codePointAt(char[] a, int index, int limit) { 9407 if (index >= limit || index < 0 || limit > a.length) { 9408 throw new IndexOutOfBoundsException(); 9409 } 9410 return codePointAtImpl(a, index, limit); 9411 } 9412 9413 // throws ArrayIndexOutOfBoundsException if index out of bounds 9414 static int codePointAtImpl(char[] a, int index, int limit) { 9415 char c1 = a[index]; 9416 if (isHighSurrogate(c1) && ++index < limit) { 9417 char c2 = a[index]; 9418 if (isLowSurrogate(c2)) { 9419 return toCodePoint(c1, c2); 9420 } 9421 } 9422 return c1; 9423 } 9424 9425 /** 9426 * Returns the code point preceding the given index of the 9427 * {@code CharSequence}. If the {@code char} value at 9428 * {@code (index - 1)} in the {@code CharSequence} is in 9429 * the low-surrogate range, {@code (index - 2)} is not 9430 * negative, and the {@code char} value at {@code (index - 2)} 9431 * in the {@code CharSequence} is in the 9432 * high-surrogate range, then the supplementary code point 9433 * corresponding to this surrogate pair is returned. Otherwise, 9434 * the {@code char} value at {@code (index - 1)} is 9435 * returned. 9436 * 9437 * @param seq the {@code CharSequence} instance 9438 * @param index the index following the code point that should be returned 9439 * @return the Unicode code point value before the given index. 9440 * @throws NullPointerException if {@code seq} is null. 9441 * @throws IndexOutOfBoundsException if the {@code index} 9442 * argument is less than 1 or greater than {@link 9443 * CharSequence#length() seq.length()}. 9444 * @since 1.5 9445 */ 9446 public static int codePointBefore(CharSequence seq, int index) { 9447 char c2 = seq.charAt(--index); 9448 if (isLowSurrogate(c2) && index > 0) { 9449 char c1 = seq.charAt(--index); 9450 if (isHighSurrogate(c1)) { 9451 return toCodePoint(c1, c2); 9452 } 9453 } 9454 return c2; 9455 } 9456 9457 /** 9458 * Returns the code point preceding the given index of the 9459 * {@code char} array. If the {@code char} value at 9460 * {@code (index - 1)} in the {@code char} array is in 9461 * the low-surrogate range, {@code (index - 2)} is not 9462 * negative, and the {@code char} value at {@code (index - 2)} 9463 * in the {@code char} array is in the 9464 * high-surrogate range, then the supplementary code point 9465 * corresponding to this surrogate pair is returned. Otherwise, 9466 * the {@code char} value at {@code (index - 1)} is 9467 * returned. 9468 * 9469 * @param a the {@code char} array 9470 * @param index the index following the code point that should be returned 9471 * @return the Unicode code point value before the given index. 9472 * @throws NullPointerException if {@code a} is null. 9473 * @throws IndexOutOfBoundsException if the {@code index} 9474 * argument is less than 1 or greater than the length of the 9475 * {@code char} array 9476 * @since 1.5 9477 */ 9478 public static int codePointBefore(char[] a, int index) { 9479 return codePointBeforeImpl(a, index, 0); 9480 } 9481 9482 /** 9483 * Returns the code point preceding the given index of the 9484 * {@code char} array, where only array elements with 9485 * {@code index} greater than or equal to {@code start} 9486 * can be used. If the {@code char} value at {@code (index - 1)} 9487 * in the {@code char} array is in the 9488 * low-surrogate range, {@code (index - 2)} is not less than 9489 * {@code start}, and the {@code char} value at 9490 * {@code (index - 2)} in the {@code char} array is in 9491 * the high-surrogate range, then the supplementary code point 9492 * corresponding to this surrogate pair is returned. Otherwise, 9493 * the {@code char} value at {@code (index - 1)} is 9494 * returned. 9495 * 9496 * @param a the {@code char} array 9497 * @param index the index following the code point that should be returned 9498 * @param start the index of the first array element in the 9499 * {@code char} array 9500 * @return the Unicode code point value before the given index. 9501 * @throws NullPointerException if {@code a} is null. 9502 * @throws IndexOutOfBoundsException if the {@code index} 9503 * argument is not greater than the {@code start} argument or 9504 * is greater than the length of the {@code char} array, or 9505 * if the {@code start} argument is negative or not less than 9506 * the length of the {@code char} array. 9507 * @since 1.5 9508 */ 9509 public static int codePointBefore(char[] a, int index, int start) { 9510 if (index <= start || start < 0 || index > a.length) { 9511 throw new IndexOutOfBoundsException(); 9512 } 9513 return codePointBeforeImpl(a, index, start); 9514 } 9515 9516 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 9517 static int codePointBeforeImpl(char[] a, int index, int start) { 9518 char c2 = a[--index]; 9519 if (isLowSurrogate(c2) && index > start) { 9520 char c1 = a[--index]; 9521 if (isHighSurrogate(c1)) { 9522 return toCodePoint(c1, c2); 9523 } 9524 } 9525 return c2; 9526 } 9527 9528 /** 9529 * Returns the leading surrogate (a 9530 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9531 * high surrogate code unit</a>) of the 9532 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9533 * surrogate pair</a> 9534 * representing the specified supplementary character (Unicode 9535 * code point) in the UTF-16 encoding. If the specified character 9536 * is not a 9537 * <a href="Character.html#supplementary">supplementary character</a>, 9538 * an unspecified {@code char} is returned. 9539 * 9540 * <p>If 9541 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9542 * is {@code true}, then 9543 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9544 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9545 * are also always {@code true}. 9546 * 9547 * @param codePoint a supplementary character (Unicode code point) 9548 * @return the leading surrogate code unit used to represent the 9549 * character in the UTF-16 encoding 9550 * @since 1.7 9551 */ 9552 public static char highSurrogate(int codePoint) { 9553 return (char) ((codePoint >>> 10) 9554 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9555 } 9556 9557 /** 9558 * Returns the trailing surrogate (a 9559 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9560 * low surrogate code unit</a>) of the 9561 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9562 * surrogate pair</a> 9563 * representing the specified supplementary character (Unicode 9564 * code point) in the UTF-16 encoding. If the specified character 9565 * is not a 9566 * <a href="Character.html#supplementary">supplementary character</a>, 9567 * an unspecified {@code char} is returned. 9568 * 9569 * <p>If 9570 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9571 * is {@code true}, then 9572 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9573 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9574 * are also always {@code true}. 9575 * 9576 * @param codePoint a supplementary character (Unicode code point) 9577 * @return the trailing surrogate code unit used to represent the 9578 * character in the UTF-16 encoding 9579 * @since 1.7 9580 */ 9581 public static char lowSurrogate(int codePoint) { 9582 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9583 } 9584 9585 /** 9586 * Converts the specified character (Unicode code point) to its 9587 * UTF-16 representation. If the specified code point is a BMP 9588 * (Basic Multilingual Plane or Plane 0) value, the same value is 9589 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9590 * specified code point is a supplementary character, its 9591 * surrogate values are stored in {@code dst[dstIndex]} 9592 * (high-surrogate) and {@code dst[dstIndex+1]} 9593 * (low-surrogate), and 2 is returned. 9594 * 9595 * @param codePoint the character (Unicode code point) to be converted. 9596 * @param dst an array of {@code char} in which the 9597 * {@code codePoint}'s UTF-16 value is stored. 9598 * @param dstIndex the start index into the {@code dst} 9599 * array where the converted value is stored. 9600 * @return 1 if the code point is a BMP code point, 2 if the 9601 * code point is a supplementary code point. 9602 * @throws IllegalArgumentException if the specified 9603 * {@code codePoint} is not a valid Unicode code point. 9604 * @throws NullPointerException if the specified {@code dst} is null. 9605 * @throws IndexOutOfBoundsException if {@code dstIndex} 9606 * is negative or not less than {@code dst.length}, or if 9607 * {@code dst} at {@code dstIndex} doesn't have enough 9608 * array element(s) to store the resulting {@code char} 9609 * value(s). (If {@code dstIndex} is equal to 9610 * {@code dst.length-1} and the specified 9611 * {@code codePoint} is a supplementary character, the 9612 * high-surrogate value is not stored in 9613 * {@code dst[dstIndex]}.) 9614 * @since 1.5 9615 */ 9616 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9617 if (isBmpCodePoint(codePoint)) { 9618 dst[dstIndex] = (char) codePoint; 9619 return 1; 9620 } else if (isValidCodePoint(codePoint)) { 9621 toSurrogates(codePoint, dst, dstIndex); 9622 return 2; 9623 } else { 9624 throw new IllegalArgumentException( 9625 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9626 } 9627 } 9628 9629 /** 9630 * Converts the specified character (Unicode code point) to its 9631 * UTF-16 representation stored in a {@code char} array. If 9632 * the specified code point is a BMP (Basic Multilingual Plane or 9633 * Plane 0) value, the resulting {@code char} array has 9634 * the same value as {@code codePoint}. If the specified code 9635 * point is a supplementary code point, the resulting 9636 * {@code char} array has the corresponding surrogate pair. 9637 * 9638 * @param codePoint a Unicode code point 9639 * @return a {@code char} array having 9640 * {@code codePoint}'s UTF-16 representation. 9641 * @throws IllegalArgumentException if the specified 9642 * {@code codePoint} is not a valid Unicode code point. 9643 * @since 1.5 9644 */ 9645 public static char[] toChars(int codePoint) { 9646 if (isBmpCodePoint(codePoint)) { 9647 return new char[] { (char) codePoint }; 9648 } else if (isValidCodePoint(codePoint)) { 9649 char[] result = new char[2]; 9650 toSurrogates(codePoint, result, 0); 9651 return result; 9652 } else { 9653 throw new IllegalArgumentException( 9654 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9655 } 9656 } 9657 9658 static void toSurrogates(int codePoint, char[] dst, int index) { 9659 // We write elements "backwards" to guarantee all-or-nothing 9660 dst[index+1] = lowSurrogate(codePoint); 9661 dst[index] = highSurrogate(codePoint); 9662 } 9663 9664 /** 9665 * Returns the number of Unicode code points in the text range of 9666 * the specified char sequence. The text range begins at the 9667 * specified {@code beginIndex} and extends to the 9668 * {@code char} at index {@code endIndex - 1}. Thus the 9669 * length (in {@code char}s) of the text range is 9670 * {@code endIndex-beginIndex}. Unpaired surrogates within 9671 * the text range count as one code point each. 9672 * 9673 * @param seq the char sequence 9674 * @param beginIndex the index to the first {@code char} of 9675 * the text range. 9676 * @param endIndex the index after the last {@code char} of 9677 * the text range. 9678 * @return the number of Unicode code points in the specified text 9679 * range 9680 * @throws NullPointerException if {@code seq} is null. 9681 * @throws IndexOutOfBoundsException if the 9682 * {@code beginIndex} is negative, or {@code endIndex} 9683 * is larger than the length of the given sequence, or 9684 * {@code beginIndex} is larger than {@code endIndex}. 9685 * @since 1.5 9686 */ 9687 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9688 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 9689 int n = endIndex - beginIndex; 9690 for (int i = beginIndex; i < endIndex; ) { 9691 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9692 isLowSurrogate(seq.charAt(i))) { 9693 n--; 9694 i++; 9695 } 9696 } 9697 return n; 9698 } 9699 9700 /** 9701 * Returns the number of Unicode code points in a subarray of the 9702 * {@code char} array argument. The {@code offset} 9703 * argument is the index of the first {@code char} of the 9704 * subarray and the {@code count} argument specifies the 9705 * length of the subarray in {@code char}s. Unpaired 9706 * surrogates within the subarray count as one code point each. 9707 * 9708 * @param a the {@code char} array 9709 * @param offset the index of the first {@code char} in the 9710 * given {@code char} array 9711 * @param count the length of the subarray in {@code char}s 9712 * @return the number of Unicode code points in the specified subarray 9713 * @throws NullPointerException if {@code a} is null. 9714 * @throws IndexOutOfBoundsException if {@code offset} or 9715 * {@code count} is negative, or if {@code offset + 9716 * count} is larger than the length of the given array. 9717 * @since 1.5 9718 */ 9719 public static int codePointCount(char[] a, int offset, int count) { 9720 Objects.checkFromIndexSize(offset, count, a.length); 9721 return codePointCountImpl(a, offset, count); 9722 } 9723 9724 static int codePointCountImpl(char[] a, int offset, int count) { 9725 int endIndex = offset + count; 9726 int n = count; 9727 for (int i = offset; i < endIndex; ) { 9728 if (isHighSurrogate(a[i++]) && i < endIndex && 9729 isLowSurrogate(a[i])) { 9730 n--; 9731 i++; 9732 } 9733 } 9734 return n; 9735 } 9736 9737 /** 9738 * Returns the index within the given char sequence that is offset 9739 * from the given {@code index} by {@code codePointOffset} 9740 * code points. Unpaired surrogates within the text range given by 9741 * {@code index} and {@code codePointOffset} count as 9742 * one code point each. 9743 * 9744 * @param seq the char sequence 9745 * @param index the index to be offset 9746 * @param codePointOffset the offset in code points 9747 * @return the index within the char sequence 9748 * @throws NullPointerException if {@code seq} is null. 9749 * @throws IndexOutOfBoundsException if {@code index} 9750 * is negative or larger than the length of the char sequence, 9751 * or if {@code codePointOffset} is positive and the 9752 * subsequence starting with {@code index} has fewer than 9753 * {@code codePointOffset} code points, or if 9754 * {@code codePointOffset} is negative and the subsequence 9755 * before {@code index} has fewer than the absolute value 9756 * of {@code codePointOffset} code points. 9757 * @since 1.5 9758 */ 9759 public static int offsetByCodePoints(CharSequence seq, int index, 9760 int codePointOffset) { 9761 int length = seq.length(); 9762 if (index < 0 || index > length) { 9763 throw new IndexOutOfBoundsException(); 9764 } 9765 9766 int x = index; 9767 if (codePointOffset >= 0) { 9768 int i; 9769 for (i = 0; x < length && i < codePointOffset; i++) { 9770 if (isHighSurrogate(seq.charAt(x++)) && x < length && 9771 isLowSurrogate(seq.charAt(x))) { 9772 x++; 9773 } 9774 } 9775 if (i < codePointOffset) { 9776 throw new IndexOutOfBoundsException(); 9777 } 9778 } else { 9779 int i; 9780 for (i = codePointOffset; x > 0 && i < 0; i++) { 9781 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 9782 isHighSurrogate(seq.charAt(x-1))) { 9783 x--; 9784 } 9785 } 9786 if (i < 0) { 9787 throw new IndexOutOfBoundsException(); 9788 } 9789 } 9790 return x; 9791 } 9792 9793 /** 9794 * Returns the index within the given {@code char} subarray 9795 * that is offset from the given {@code index} by 9796 * {@code codePointOffset} code points. The 9797 * {@code start} and {@code count} arguments specify a 9798 * subarray of the {@code char} array. Unpaired surrogates 9799 * within the text range given by {@code index} and 9800 * {@code codePointOffset} count as one code point each. 9801 * 9802 * @param a the {@code char} array 9803 * @param start the index of the first {@code char} of the 9804 * subarray 9805 * @param count the length of the subarray in {@code char}s 9806 * @param index the index to be offset 9807 * @param codePointOffset the offset in code points 9808 * @return the index within the subarray 9809 * @throws NullPointerException if {@code a} is null. 9810 * @throws IndexOutOfBoundsException 9811 * if {@code start} or {@code count} is negative, 9812 * or if {@code start + count} is larger than the length of 9813 * the given array, 9814 * or if {@code index} is less than {@code start} or 9815 * larger then {@code start + count}, 9816 * or if {@code codePointOffset} is positive and the text range 9817 * starting with {@code index} and ending with {@code start + count - 1} 9818 * has fewer than {@code codePointOffset} code 9819 * points, 9820 * or if {@code codePointOffset} is negative and the text range 9821 * starting with {@code start} and ending with {@code index - 1} 9822 * has fewer than the absolute value of 9823 * {@code codePointOffset} code points. 9824 * @since 1.5 9825 */ 9826 public static int offsetByCodePoints(char[] a, int start, int count, 9827 int index, int codePointOffset) { 9828 if (count > a.length-start || start < 0 || count < 0 9829 || index < start || index > start+count) { 9830 throw new IndexOutOfBoundsException(); 9831 } 9832 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9833 } 9834 9835 static int offsetByCodePointsImpl(char[]a, int start, int count, 9836 int index, int codePointOffset) { 9837 int x = index; 9838 if (codePointOffset >= 0) { 9839 int limit = start + count; 9840 int i; 9841 for (i = 0; x < limit && i < codePointOffset; i++) { 9842 if (isHighSurrogate(a[x++]) && x < limit && 9843 isLowSurrogate(a[x])) { 9844 x++; 9845 } 9846 } 9847 if (i < codePointOffset) { 9848 throw new IndexOutOfBoundsException(); 9849 } 9850 } else { 9851 int i; 9852 for (i = codePointOffset; x > start && i < 0; i++) { 9853 if (isLowSurrogate(a[--x]) && x > start && 9854 isHighSurrogate(a[x-1])) { 9855 x--; 9856 } 9857 } 9858 if (i < 0) { 9859 throw new IndexOutOfBoundsException(); 9860 } 9861 } 9862 return x; 9863 } 9864 9865 /** 9866 * Determines if the specified character is a lowercase character. 9867 * <p> 9868 * A character is lowercase if its general category type, provided 9869 * by {@code Character.getType(ch)}, is 9870 * {@code LOWERCASE_LETTER}, or it has contributory property 9871 * Other_Lowercase as defined by the Unicode Standard. 9872 * <p> 9873 * The following are examples of lowercase characters: 9874 * <blockquote><pre> 9875 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9876 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9877 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9878 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9879 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9880 * </pre></blockquote> 9881 * <p> Many other Unicode characters are lowercase too. 9882 * 9883 * <p><b>Note:</b> This method cannot handle <a 9884 * href="#supplementary"> supplementary characters</a>. To support 9885 * all Unicode characters, including supplementary characters, use 9886 * the {@link #isLowerCase(int)} method. 9887 * 9888 * @param ch the character to be tested. 9889 * @return {@code true} if the character is lowercase; 9890 * {@code false} otherwise. 9891 * @see Character#isLowerCase(char) 9892 * @see Character#isTitleCase(char) 9893 * @see Character#toLowerCase(char) 9894 * @see Character#getType(char) 9895 */ 9896 public static boolean isLowerCase(char ch) { 9897 return isLowerCase((int)ch); 9898 } 9899 9900 /** 9901 * Determines if the specified character (Unicode code point) is a 9902 * lowercase character. 9903 * <p> 9904 * A character is lowercase if its general category type, provided 9905 * by {@link Character#getType getType(codePoint)}, is 9906 * {@code LOWERCASE_LETTER}, or it has contributory property 9907 * Other_Lowercase as defined by the Unicode Standard. 9908 * <p> 9909 * The following are examples of lowercase characters: 9910 * <blockquote><pre> 9911 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9912 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9913 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9914 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9915 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9916 * </pre></blockquote> 9917 * <p> Many other Unicode characters are lowercase too. 9918 * 9919 * @param codePoint the character (Unicode code point) to be tested. 9920 * @return {@code true} if the character is lowercase; 9921 * {@code false} otherwise. 9922 * @see Character#isLowerCase(int) 9923 * @see Character#isTitleCase(int) 9924 * @see Character#toLowerCase(int) 9925 * @see Character#getType(int) 9926 * @since 1.5 9927 */ 9928 public static boolean isLowerCase(int codePoint) { 9929 return CharacterData.of(codePoint).isLowerCase(codePoint); 9930 } 9931 9932 /** 9933 * Determines if the specified character is an uppercase character. 9934 * <p> 9935 * A character is uppercase if its general category type, provided by 9936 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9937 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9938 * <p> 9939 * The following are examples of uppercase characters: 9940 * <blockquote><pre> 9941 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9942 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9943 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9944 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9945 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9946 * </pre></blockquote> 9947 * <p> Many other Unicode characters are uppercase too. 9948 * 9949 * <p><b>Note:</b> This method cannot handle <a 9950 * href="#supplementary"> supplementary characters</a>. To support 9951 * all Unicode characters, including supplementary characters, use 9952 * the {@link #isUpperCase(int)} method. 9953 * 9954 * @param ch the character to be tested. 9955 * @return {@code true} if the character is uppercase; 9956 * {@code false} otherwise. 9957 * @see Character#isLowerCase(char) 9958 * @see Character#isTitleCase(char) 9959 * @see Character#toUpperCase(char) 9960 * @see Character#getType(char) 9961 * @since 1.0 9962 */ 9963 public static boolean isUpperCase(char ch) { 9964 return isUpperCase((int)ch); 9965 } 9966 9967 /** 9968 * Determines if the specified character (Unicode code point) is an uppercase character. 9969 * <p> 9970 * A character is uppercase if its general category type, provided by 9971 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 9972 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9973 * <p> 9974 * The following are examples of uppercase characters: 9975 * <blockquote><pre> 9976 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9977 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9978 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9979 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9980 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9981 * </pre></blockquote> 9982 * <p> Many other Unicode characters are uppercase too. 9983 * 9984 * @param codePoint the character (Unicode code point) to be tested. 9985 * @return {@code true} if the character is uppercase; 9986 * {@code false} otherwise. 9987 * @see Character#isLowerCase(int) 9988 * @see Character#isTitleCase(int) 9989 * @see Character#toUpperCase(int) 9990 * @see Character#getType(int) 9991 * @since 1.5 9992 */ 9993 public static boolean isUpperCase(int codePoint) { 9994 return CharacterData.of(codePoint).isUpperCase(codePoint); 9995 } 9996 9997 /** 9998 * Determines if the specified character is a titlecase character. 9999 * <p> 10000 * A character is a titlecase character if its general 10001 * category type, provided by {@code Character.getType(ch)}, 10002 * is {@code TITLECASE_LETTER}. 10003 * <p> 10004 * Some characters look like pairs of Latin letters. For example, there 10005 * is an uppercase letter that looks like "LJ" and has a corresponding 10006 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10007 * is the appropriate form to use when rendering a word in lowercase 10008 * with initial capitals, as for a book title. 10009 * <p> 10010 * These are some of the Unicode characters for which this method returns 10011 * {@code true}: 10012 * <ul> 10013 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10014 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10015 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10016 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10017 * </ul> 10018 * <p> Many other Unicode characters are titlecase too. 10019 * 10020 * <p><b>Note:</b> This method cannot handle <a 10021 * href="#supplementary"> supplementary characters</a>. To support 10022 * all Unicode characters, including supplementary characters, use 10023 * the {@link #isTitleCase(int)} method. 10024 * 10025 * @param ch the character to be tested. 10026 * @return {@code true} if the character is titlecase; 10027 * {@code false} otherwise. 10028 * @see Character#isLowerCase(char) 10029 * @see Character#isUpperCase(char) 10030 * @see Character#toTitleCase(char) 10031 * @see Character#getType(char) 10032 * @since 1.0.2 10033 */ 10034 public static boolean isTitleCase(char ch) { 10035 return isTitleCase((int)ch); 10036 } 10037 10038 /** 10039 * Determines if the specified character (Unicode code point) is a titlecase character. 10040 * <p> 10041 * A character is a titlecase character if its general 10042 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10043 * is {@code TITLECASE_LETTER}. 10044 * <p> 10045 * Some characters look like pairs of Latin letters. For example, there 10046 * is an uppercase letter that looks like "LJ" and has a corresponding 10047 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10048 * is the appropriate form to use when rendering a word in lowercase 10049 * with initial capitals, as for a book title. 10050 * <p> 10051 * These are some of the Unicode characters for which this method returns 10052 * {@code true}: 10053 * <ul> 10054 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10055 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10056 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10057 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10058 * </ul> 10059 * <p> Many other Unicode characters are titlecase too. 10060 * 10061 * @param codePoint the character (Unicode code point) to be tested. 10062 * @return {@code true} if the character is titlecase; 10063 * {@code false} otherwise. 10064 * @see Character#isLowerCase(int) 10065 * @see Character#isUpperCase(int) 10066 * @see Character#toTitleCase(int) 10067 * @see Character#getType(int) 10068 * @since 1.5 10069 */ 10070 public static boolean isTitleCase(int codePoint) { 10071 return getType(codePoint) == Character.TITLECASE_LETTER; 10072 } 10073 10074 /** 10075 * Determines if the specified character is a digit. 10076 * <p> 10077 * A character is a digit if its general category type, provided 10078 * by {@code Character.getType(ch)}, is 10079 * {@code DECIMAL_DIGIT_NUMBER}. 10080 * <p> 10081 * Some Unicode character ranges that contain digits: 10082 * <ul> 10083 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10084 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10085 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10086 * Arabic-Indic digits 10087 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10088 * Extended Arabic-Indic digits 10089 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10090 * Devanagari digits 10091 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10092 * Fullwidth digits 10093 * </ul> 10094 * 10095 * Many other character ranges contain digits as well. 10096 * 10097 * <p><b>Note:</b> This method cannot handle <a 10098 * href="#supplementary"> supplementary characters</a>. To support 10099 * all Unicode characters, including supplementary characters, use 10100 * the {@link #isDigit(int)} method. 10101 * 10102 * @param ch the character to be tested. 10103 * @return {@code true} if the character is a digit; 10104 * {@code false} otherwise. 10105 * @see Character#digit(char, int) 10106 * @see Character#forDigit(int, int) 10107 * @see Character#getType(char) 10108 */ 10109 public static boolean isDigit(char ch) { 10110 return isDigit((int)ch); 10111 } 10112 10113 /** 10114 * Determines if the specified character (Unicode code point) is a digit. 10115 * <p> 10116 * A character is a digit if its general category type, provided 10117 * by {@link Character#getType(int) getType(codePoint)}, is 10118 * {@code DECIMAL_DIGIT_NUMBER}. 10119 * <p> 10120 * Some Unicode character ranges that contain digits: 10121 * <ul> 10122 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10123 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10124 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10125 * Arabic-Indic digits 10126 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10127 * Extended Arabic-Indic digits 10128 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10129 * Devanagari digits 10130 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10131 * Fullwidth digits 10132 * </ul> 10133 * 10134 * Many other character ranges contain digits as well. 10135 * 10136 * @param codePoint the character (Unicode code point) to be tested. 10137 * @return {@code true} if the character is a digit; 10138 * {@code false} otherwise. 10139 * @see Character#forDigit(int, int) 10140 * @see Character#getType(int) 10141 * @since 1.5 10142 */ 10143 public static boolean isDigit(int codePoint) { 10144 return CharacterData.of(codePoint).isDigit(codePoint); 10145 } 10146 10147 /** 10148 * Determines if a character is defined in Unicode. 10149 * <p> 10150 * A character is defined if at least one of the following is true: 10151 * <ul> 10152 * <li>It has an entry in the UnicodeData file. 10153 * <li>It has a value in a range defined by the UnicodeData file. 10154 * </ul> 10155 * 10156 * <p><b>Note:</b> This method cannot handle <a 10157 * href="#supplementary"> supplementary characters</a>. To support 10158 * all Unicode characters, including supplementary characters, use 10159 * the {@link #isDefined(int)} method. 10160 * 10161 * @param ch the character to be tested 10162 * @return {@code true} if the character has a defined meaning 10163 * in Unicode; {@code false} otherwise. 10164 * @see Character#isDigit(char) 10165 * @see Character#isLetter(char) 10166 * @see Character#isLetterOrDigit(char) 10167 * @see Character#isLowerCase(char) 10168 * @see Character#isTitleCase(char) 10169 * @see Character#isUpperCase(char) 10170 * @since 1.0.2 10171 */ 10172 public static boolean isDefined(char ch) { 10173 return isDefined((int)ch); 10174 } 10175 10176 /** 10177 * Determines if a character (Unicode code point) is defined in Unicode. 10178 * <p> 10179 * A character is defined if at least one of the following is true: 10180 * <ul> 10181 * <li>It has an entry in the UnicodeData file. 10182 * <li>It has a value in a range defined by the UnicodeData file. 10183 * </ul> 10184 * 10185 * @param codePoint the character (Unicode code point) to be tested. 10186 * @return {@code true} if the character has a defined meaning 10187 * in Unicode; {@code false} otherwise. 10188 * @see Character#isDigit(int) 10189 * @see Character#isLetter(int) 10190 * @see Character#isLetterOrDigit(int) 10191 * @see Character#isLowerCase(int) 10192 * @see Character#isTitleCase(int) 10193 * @see Character#isUpperCase(int) 10194 * @since 1.5 10195 */ 10196 public static boolean isDefined(int codePoint) { 10197 return getType(codePoint) != Character.UNASSIGNED; 10198 } 10199 10200 /** 10201 * Determines if the specified character is a letter. 10202 * <p> 10203 * A character is considered to be a letter if its general 10204 * category type, provided by {@code Character.getType(ch)}, 10205 * is any of the following: 10206 * <ul> 10207 * <li> {@code UPPERCASE_LETTER} 10208 * <li> {@code LOWERCASE_LETTER} 10209 * <li> {@code TITLECASE_LETTER} 10210 * <li> {@code MODIFIER_LETTER} 10211 * <li> {@code OTHER_LETTER} 10212 * </ul> 10213 * 10214 * Not all letters have case. Many characters are 10215 * letters but are neither uppercase nor lowercase nor titlecase. 10216 * 10217 * <p><b>Note:</b> This method cannot handle <a 10218 * href="#supplementary"> supplementary characters</a>. To support 10219 * all Unicode characters, including supplementary characters, use 10220 * the {@link #isLetter(int)} method. 10221 * 10222 * @param ch the character to be tested. 10223 * @return {@code true} if the character is a letter; 10224 * {@code false} otherwise. 10225 * @see Character#isDigit(char) 10226 * @see Character#isJavaIdentifierStart(char) 10227 * @see Character#isJavaLetter(char) 10228 * @see Character#isJavaLetterOrDigit(char) 10229 * @see Character#isLetterOrDigit(char) 10230 * @see Character#isLowerCase(char) 10231 * @see Character#isTitleCase(char) 10232 * @see Character#isUnicodeIdentifierStart(char) 10233 * @see Character#isUpperCase(char) 10234 */ 10235 public static boolean isLetter(char ch) { 10236 return isLetter((int)ch); 10237 } 10238 10239 /** 10240 * Determines if the specified character (Unicode code point) is a letter. 10241 * <p> 10242 * A character is considered to be a letter if its general 10243 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10244 * is any of the following: 10245 * <ul> 10246 * <li> {@code UPPERCASE_LETTER} 10247 * <li> {@code LOWERCASE_LETTER} 10248 * <li> {@code TITLECASE_LETTER} 10249 * <li> {@code MODIFIER_LETTER} 10250 * <li> {@code OTHER_LETTER} 10251 * </ul> 10252 * 10253 * Not all letters have case. Many characters are 10254 * letters but are neither uppercase nor lowercase nor titlecase. 10255 * 10256 * @param codePoint the character (Unicode code point) to be tested. 10257 * @return {@code true} if the character is a letter; 10258 * {@code false} otherwise. 10259 * @see Character#isDigit(int) 10260 * @see Character#isJavaIdentifierStart(int) 10261 * @see Character#isLetterOrDigit(int) 10262 * @see Character#isLowerCase(int) 10263 * @see Character#isTitleCase(int) 10264 * @see Character#isUnicodeIdentifierStart(int) 10265 * @see Character#isUpperCase(int) 10266 * @since 1.5 10267 */ 10268 public static boolean isLetter(int codePoint) { 10269 return ((((1 << Character.UPPERCASE_LETTER) | 10270 (1 << Character.LOWERCASE_LETTER) | 10271 (1 << Character.TITLECASE_LETTER) | 10272 (1 << Character.MODIFIER_LETTER) | 10273 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10274 != 0; 10275 } 10276 10277 /** 10278 * Determines if the specified character is a letter or digit. 10279 * <p> 10280 * A character is considered to be a letter or digit if either 10281 * {@code Character.isLetter(char ch)} or 10282 * {@code Character.isDigit(char ch)} returns 10283 * {@code true} for the character. 10284 * 10285 * <p><b>Note:</b> This method cannot handle <a 10286 * href="#supplementary"> supplementary characters</a>. To support 10287 * all Unicode characters, including supplementary characters, use 10288 * the {@link #isLetterOrDigit(int)} method. 10289 * 10290 * @param ch the character to be tested. 10291 * @return {@code true} if the character is a letter or digit; 10292 * {@code false} otherwise. 10293 * @see Character#isDigit(char) 10294 * @see Character#isJavaIdentifierPart(char) 10295 * @see Character#isJavaLetter(char) 10296 * @see Character#isJavaLetterOrDigit(char) 10297 * @see Character#isLetter(char) 10298 * @see Character#isUnicodeIdentifierPart(char) 10299 * @since 1.0.2 10300 */ 10301 public static boolean isLetterOrDigit(char ch) { 10302 return isLetterOrDigit((int)ch); 10303 } 10304 10305 /** 10306 * Determines if the specified character (Unicode code point) is a letter or digit. 10307 * <p> 10308 * A character is considered to be a letter or digit if either 10309 * {@link #isLetter(int) isLetter(codePoint)} or 10310 * {@link #isDigit(int) isDigit(codePoint)} returns 10311 * {@code true} for the character. 10312 * 10313 * @param codePoint the character (Unicode code point) to be tested. 10314 * @return {@code true} if the character is a letter or digit; 10315 * {@code false} otherwise. 10316 * @see Character#isDigit(int) 10317 * @see Character#isJavaIdentifierPart(int) 10318 * @see Character#isLetter(int) 10319 * @see Character#isUnicodeIdentifierPart(int) 10320 * @since 1.5 10321 */ 10322 public static boolean isLetterOrDigit(int codePoint) { 10323 return ((((1 << Character.UPPERCASE_LETTER) | 10324 (1 << Character.LOWERCASE_LETTER) | 10325 (1 << Character.TITLECASE_LETTER) | 10326 (1 << Character.MODIFIER_LETTER) | 10327 (1 << Character.OTHER_LETTER) | 10328 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10329 != 0; 10330 } 10331 10332 /** 10333 * Determines if the specified character is permissible as the first 10334 * character in a Java identifier. 10335 * <p> 10336 * A character may start a Java identifier if and only if 10337 * one of the following conditions is true: 10338 * <ul> 10339 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10340 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10341 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10342 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10343 * </ul> 10344 * 10345 * @param ch the character to be tested. 10346 * @return {@code true} if the character may start a Java 10347 * identifier; {@code false} otherwise. 10348 * @see Character#isJavaLetterOrDigit(char) 10349 * @see Character#isJavaIdentifierStart(char) 10350 * @see Character#isJavaIdentifierPart(char) 10351 * @see Character#isLetter(char) 10352 * @see Character#isLetterOrDigit(char) 10353 * @see Character#isUnicodeIdentifierStart(char) 10354 * @since 1.0.2 10355 * @deprecated Replaced by isJavaIdentifierStart(char). 10356 */ 10357 @Deprecated(since="1.1") 10358 public static boolean isJavaLetter(char ch) { 10359 return isJavaIdentifierStart(ch); 10360 } 10361 10362 /** 10363 * Determines if the specified character may be part of a Java 10364 * identifier as other than the first character. 10365 * <p> 10366 * A character may be part of a Java identifier if and only if one 10367 * of the following conditions is true: 10368 * <ul> 10369 * <li> it is a letter 10370 * <li> it is a currency symbol (such as {@code '$'}) 10371 * <li> it is a connecting punctuation character (such as {@code '_'}) 10372 * <li> it is a digit 10373 * <li> it is a numeric letter (such as a Roman numeral character) 10374 * <li> it is a combining mark 10375 * <li> it is a non-spacing mark 10376 * <li> {@code isIdentifierIgnorable} returns 10377 * {@code true} for the character. 10378 * </ul> 10379 * 10380 * @param ch the character to be tested. 10381 * @return {@code true} if the character may be part of a 10382 * Java identifier; {@code false} otherwise. 10383 * @see Character#isJavaLetter(char) 10384 * @see Character#isJavaIdentifierStart(char) 10385 * @see Character#isJavaIdentifierPart(char) 10386 * @see Character#isLetter(char) 10387 * @see Character#isLetterOrDigit(char) 10388 * @see Character#isUnicodeIdentifierPart(char) 10389 * @see Character#isIdentifierIgnorable(char) 10390 * @since 1.0.2 10391 * @deprecated Replaced by isJavaIdentifierPart(char). 10392 */ 10393 @Deprecated(since="1.1") 10394 public static boolean isJavaLetterOrDigit(char ch) { 10395 return isJavaIdentifierPart(ch); 10396 } 10397 10398 /** 10399 * Determines if the specified character (Unicode code point) is alphabetic. 10400 * <p> 10401 * A character is considered to be alphabetic if its general category type, 10402 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10403 * the following: 10404 * <ul> 10405 * <li> {@code UPPERCASE_LETTER} 10406 * <li> {@code LOWERCASE_LETTER} 10407 * <li> {@code TITLECASE_LETTER} 10408 * <li> {@code MODIFIER_LETTER} 10409 * <li> {@code OTHER_LETTER} 10410 * <li> {@code LETTER_NUMBER} 10411 * </ul> 10412 * or it has contributory property Other_Alphabetic as defined by the 10413 * Unicode Standard. 10414 * 10415 * @param codePoint the character (Unicode code point) to be tested. 10416 * @return {@code true} if the character is a Unicode alphabet 10417 * character, {@code false} otherwise. 10418 * @since 1.7 10419 */ 10420 public static boolean isAlphabetic(int codePoint) { 10421 return (((((1 << Character.UPPERCASE_LETTER) | 10422 (1 << Character.LOWERCASE_LETTER) | 10423 (1 << Character.TITLECASE_LETTER) | 10424 (1 << Character.MODIFIER_LETTER) | 10425 (1 << Character.OTHER_LETTER) | 10426 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10427 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10428 } 10429 10430 /** 10431 * Determines if the specified character (Unicode code point) is a CJKV 10432 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10433 * the Unicode Standard. 10434 * 10435 * @param codePoint the character (Unicode code point) to be tested. 10436 * @return {@code true} if the character is a Unicode ideograph 10437 * character, {@code false} otherwise. 10438 * @since 1.7 10439 */ 10440 public static boolean isIdeographic(int codePoint) { 10441 return CharacterData.of(codePoint).isIdeographic(codePoint); 10442 } 10443 10444 /** 10445 * Determines if the specified character is 10446 * permissible as the first character in a Java identifier. 10447 * <p> 10448 * A character may start a Java identifier if and only if 10449 * one of the following conditions is true: 10450 * <ul> 10451 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10452 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10453 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10454 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10455 * </ul> 10456 * 10457 * <p><b>Note:</b> This method cannot handle <a 10458 * href="#supplementary"> supplementary characters</a>. To support 10459 * all Unicode characters, including supplementary characters, use 10460 * the {@link #isJavaIdentifierStart(int)} method. 10461 * 10462 * @param ch the character to be tested. 10463 * @return {@code true} if the character may start a Java identifier; 10464 * {@code false} otherwise. 10465 * @see Character#isJavaIdentifierPart(char) 10466 * @see Character#isLetter(char) 10467 * @see Character#isUnicodeIdentifierStart(char) 10468 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10469 * @since 1.1 10470 */ 10471 @SuppressWarnings("doclint:reference") // cross-module links 10472 public static boolean isJavaIdentifierStart(char ch) { 10473 return isJavaIdentifierStart((int)ch); 10474 } 10475 10476 /** 10477 * Determines if the character (Unicode code point) is 10478 * permissible as the first character in a Java identifier. 10479 * <p> 10480 * A character may start a Java identifier if and only if 10481 * one of the following conditions is true: 10482 * <ul> 10483 * <li> {@link #isLetter(int) isLetter(codePoint)} 10484 * returns {@code true} 10485 * <li> {@link #getType(int) getType(codePoint)} 10486 * returns {@code LETTER_NUMBER} 10487 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10488 * <li> the referenced character is a connecting punctuation character 10489 * (such as {@code '_'}). 10490 * </ul> 10491 * 10492 * @param codePoint the character (Unicode code point) to be tested. 10493 * @return {@code true} if the character may start a Java identifier; 10494 * {@code false} otherwise. 10495 * @see Character#isJavaIdentifierPart(int) 10496 * @see Character#isLetter(int) 10497 * @see Character#isUnicodeIdentifierStart(int) 10498 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10499 * @since 1.5 10500 */ 10501 @SuppressWarnings("doclint:reference") // cross-module links 10502 public static boolean isJavaIdentifierStart(int codePoint) { 10503 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10504 } 10505 10506 /** 10507 * Determines if the specified character may be part of a Java 10508 * identifier as other than the first character. 10509 * <p> 10510 * A character may be part of a Java identifier if any of the following 10511 * conditions are true: 10512 * <ul> 10513 * <li> it is a letter 10514 * <li> it is a currency symbol (such as {@code '$'}) 10515 * <li> it is a connecting punctuation character (such as {@code '_'}) 10516 * <li> it is a digit 10517 * <li> it is a numeric letter (such as a Roman numeral character) 10518 * <li> it is a combining mark 10519 * <li> it is a non-spacing mark 10520 * <li> {@code isIdentifierIgnorable} returns 10521 * {@code true} for the character 10522 * </ul> 10523 * 10524 * <p><b>Note:</b> This method cannot handle <a 10525 * href="#supplementary"> supplementary characters</a>. To support 10526 * all Unicode characters, including supplementary characters, use 10527 * the {@link #isJavaIdentifierPart(int)} method. 10528 * 10529 * @param ch the character to be tested. 10530 * @return {@code true} if the character may be part of a 10531 * Java identifier; {@code false} otherwise. 10532 * @see Character#isIdentifierIgnorable(char) 10533 * @see Character#isJavaIdentifierStart(char) 10534 * @see Character#isLetterOrDigit(char) 10535 * @see Character#isUnicodeIdentifierPart(char) 10536 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10537 * @since 1.1 10538 */ 10539 @SuppressWarnings("doclint:reference") // cross-module links 10540 public static boolean isJavaIdentifierPart(char ch) { 10541 return isJavaIdentifierPart((int)ch); 10542 } 10543 10544 /** 10545 * Determines if the character (Unicode code point) may be part of a Java 10546 * identifier as other than the first character. 10547 * <p> 10548 * A character may be part of a Java identifier if any of the following 10549 * conditions are true: 10550 * <ul> 10551 * <li> it is a letter 10552 * <li> it is a currency symbol (such as {@code '$'}) 10553 * <li> it is a connecting punctuation character (such as {@code '_'}) 10554 * <li> it is a digit 10555 * <li> it is a numeric letter (such as a Roman numeral character) 10556 * <li> it is a combining mark 10557 * <li> it is a non-spacing mark 10558 * <li> {@link #isIdentifierIgnorable(int) 10559 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10560 * the code point 10561 * </ul> 10562 * 10563 * @param codePoint the character (Unicode code point) to be tested. 10564 * @return {@code true} if the character may be part of a 10565 * Java identifier; {@code false} otherwise. 10566 * @see Character#isIdentifierIgnorable(int) 10567 * @see Character#isJavaIdentifierStart(int) 10568 * @see Character#isLetterOrDigit(int) 10569 * @see Character#isUnicodeIdentifierPart(int) 10570 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10571 * @since 1.5 10572 */ 10573 @SuppressWarnings("doclint:reference") // cross-module links 10574 public static boolean isJavaIdentifierPart(int codePoint) { 10575 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10576 } 10577 10578 /** 10579 * Determines if the specified character is permissible as the 10580 * first character in a Unicode identifier. 10581 * <p> 10582 * A character may start a Unicode identifier if and only if 10583 * one of the following conditions is true: 10584 * <ul> 10585 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10586 * <li> {@link #getType(char) getType(ch)} returns 10587 * {@code LETTER_NUMBER}. 10588 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10589 * {@code Other_ID_Start}</a> character. 10590 * </ul> 10591 * <p> 10592 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10593 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10594 * with the following profile of UAX31: 10595 * <pre> 10596 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10597 * </pre> 10598 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10599 * compatibility. 10600 * 10601 * <p><b>Note:</b> This method cannot handle <a 10602 * href="#supplementary"> supplementary characters</a>. To support 10603 * all Unicode characters, including supplementary characters, use 10604 * the {@link #isUnicodeIdentifierStart(int)} method. 10605 * 10606 * @param ch the character to be tested. 10607 * @return {@code true} if the character may start a Unicode 10608 * identifier; {@code false} otherwise. 10609 * 10610 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10611 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10612 * @see Character#isJavaIdentifierStart(char) 10613 * @see Character#isLetter(char) 10614 * @see Character#isUnicodeIdentifierPart(char) 10615 * @since 1.1 10616 */ 10617 public static boolean isUnicodeIdentifierStart(char ch) { 10618 return isUnicodeIdentifierStart((int)ch); 10619 } 10620 10621 /** 10622 * Determines if the specified character (Unicode code point) is permissible as the 10623 * first character in a Unicode identifier. 10624 * <p> 10625 * A character may start a Unicode identifier if and only if 10626 * one of the following conditions is true: 10627 * <ul> 10628 * <li> {@link #isLetter(int) isLetter(codePoint)} 10629 * returns {@code true} 10630 * <li> {@link #getType(int) getType(codePoint)} 10631 * returns {@code LETTER_NUMBER}. 10632 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10633 * {@code Other_ID_Start}</a> character. 10634 * </ul> 10635 * <p> 10636 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10637 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10638 * with the following profile of UAX31: 10639 * <pre> 10640 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10641 * </pre> 10642 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10643 * compatibility. 10644 * 10645 * @param codePoint the character (Unicode code point) to be tested. 10646 * @return {@code true} if the character may start a Unicode 10647 * identifier; {@code false} otherwise. 10648 * 10649 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10650 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10651 * @see Character#isJavaIdentifierStart(int) 10652 * @see Character#isLetter(int) 10653 * @see Character#isUnicodeIdentifierPart(int) 10654 * @since 1.5 10655 */ 10656 public static boolean isUnicodeIdentifierStart(int codePoint) { 10657 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10658 } 10659 10660 /** 10661 * Determines if the specified character may be part of a Unicode 10662 * identifier as other than the first character. 10663 * <p> 10664 * A character may be part of a Unicode identifier if and only if 10665 * one of the following statements is true: 10666 * <ul> 10667 * <li> it is a letter 10668 * <li> it is a connecting punctuation character (such as {@code '_'}) 10669 * <li> it is a digit 10670 * <li> it is a numeric letter (such as a Roman numeral character) 10671 * <li> it is a combining mark 10672 * <li> it is a non-spacing mark 10673 * <li> {@code isIdentifierIgnorable} returns 10674 * {@code true} for this character. 10675 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10676 * {@code Other_ID_Start}</a> character. 10677 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10678 * {@code Other_ID_Continue}</a> character. 10679 * </ul> 10680 * <p> 10681 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10682 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10683 * with the following profile of UAX31: 10684 * <pre> 10685 * Continue := Start + ID_Continue + ignorable 10686 * Medial := empty 10687 * ignorable := isIdentifierIgnorable(char) returns true for the character 10688 * </pre> 10689 * {@code ignorable} is added to {@code Continue} for backward 10690 * compatibility. 10691 * 10692 * <p><b>Note:</b> This method cannot handle <a 10693 * href="#supplementary"> supplementary characters</a>. To support 10694 * all Unicode characters, including supplementary characters, use 10695 * the {@link #isUnicodeIdentifierPart(int)} method. 10696 * 10697 * @param ch the character to be tested. 10698 * @return {@code true} if the character may be part of a 10699 * Unicode identifier; {@code false} otherwise. 10700 * 10701 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10702 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10703 * @see Character#isIdentifierIgnorable(char) 10704 * @see Character#isJavaIdentifierPart(char) 10705 * @see Character#isLetterOrDigit(char) 10706 * @see Character#isUnicodeIdentifierStart(char) 10707 * @since 1.1 10708 */ 10709 public static boolean isUnicodeIdentifierPart(char ch) { 10710 return isUnicodeIdentifierPart((int)ch); 10711 } 10712 10713 /** 10714 * Determines if the specified character (Unicode code point) may be part of a Unicode 10715 * identifier as other than the first character. 10716 * <p> 10717 * A character may be part of a Unicode identifier if and only if 10718 * one of the following statements is true: 10719 * <ul> 10720 * <li> it is a letter 10721 * <li> it is a connecting punctuation character (such as {@code '_'}) 10722 * <li> it is a digit 10723 * <li> it is a numeric letter (such as a Roman numeral character) 10724 * <li> it is a combining mark 10725 * <li> it is a non-spacing mark 10726 * <li> {@code isIdentifierIgnorable} returns 10727 * {@code true} for this character. 10728 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10729 * {@code Other_ID_Start}</a> character. 10730 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10731 * {@code Other_ID_Continue}</a> character. 10732 * </ul> 10733 * <p> 10734 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10735 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10736 * with the following profile of UAX31: 10737 * <pre> 10738 * Continue := Start + ID_Continue + ignorable 10739 * Medial := empty 10740 * ignorable := isIdentifierIgnorable(int) returns true for the character 10741 * </pre> 10742 * {@code ignorable} is added to {@code Continue} for backward 10743 * compatibility. 10744 * 10745 * @param codePoint the character (Unicode code point) to be tested. 10746 * @return {@code true} if the character may be part of a 10747 * Unicode identifier; {@code false} otherwise. 10748 * 10749 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10750 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10751 * @see Character#isIdentifierIgnorable(int) 10752 * @see Character#isJavaIdentifierPart(int) 10753 * @see Character#isLetterOrDigit(int) 10754 * @see Character#isUnicodeIdentifierStart(int) 10755 * @since 1.5 10756 */ 10757 public static boolean isUnicodeIdentifierPart(int codePoint) { 10758 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 10759 } 10760 10761 /** 10762 * Determines if the specified character should be regarded as 10763 * an ignorable character in a Java identifier or a Unicode identifier. 10764 * <p> 10765 * The following Unicode characters are ignorable in a Java identifier 10766 * or a Unicode identifier: 10767 * <ul> 10768 * <li>ISO control characters that are not whitespace 10769 * <ul> 10770 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10771 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10772 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10773 * </ul> 10774 * 10775 * <li>all characters that have the {@code FORMAT} general 10776 * category value 10777 * </ul> 10778 * 10779 * <p><b>Note:</b> This method cannot handle <a 10780 * href="#supplementary"> supplementary characters</a>. To support 10781 * all Unicode characters, including supplementary characters, use 10782 * the {@link #isIdentifierIgnorable(int)} method. 10783 * 10784 * @param ch the character to be tested. 10785 * @return {@code true} if the character is an ignorable control 10786 * character that may be part of a Java or Unicode identifier; 10787 * {@code false} otherwise. 10788 * @see Character#isJavaIdentifierPart(char) 10789 * @see Character#isUnicodeIdentifierPart(char) 10790 * @since 1.1 10791 */ 10792 public static boolean isIdentifierIgnorable(char ch) { 10793 return isIdentifierIgnorable((int)ch); 10794 } 10795 10796 /** 10797 * Determines if the specified character (Unicode code point) should be regarded as 10798 * an ignorable character in a Java identifier or a Unicode identifier. 10799 * <p> 10800 * The following Unicode characters are ignorable in a Java identifier 10801 * or a Unicode identifier: 10802 * <ul> 10803 * <li>ISO control characters that are not whitespace 10804 * <ul> 10805 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10806 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10807 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10808 * </ul> 10809 * 10810 * <li>all characters that have the {@code FORMAT} general 10811 * category value 10812 * </ul> 10813 * 10814 * @param codePoint the character (Unicode code point) to be tested. 10815 * @return {@code true} if the character is an ignorable control 10816 * character that may be part of a Java or Unicode identifier; 10817 * {@code false} otherwise. 10818 * @see Character#isJavaIdentifierPart(int) 10819 * @see Character#isUnicodeIdentifierPart(int) 10820 * @since 1.5 10821 */ 10822 public static boolean isIdentifierIgnorable(int codePoint) { 10823 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 10824 } 10825 10826 /** 10827 * Determines if the specified character (Unicode code point) is an Emoji. 10828 * <p> 10829 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 10830 * property, defined in 10831 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10832 * Unicode Emoji (Technical Standard #51)</a>. 10833 * 10834 * @param codePoint the character (Unicode code point) to be tested. 10835 * @return {@code true} if the character is an Emoji; 10836 * {@code false} otherwise. 10837 * @since 21 10838 */ 10839 public static boolean isEmoji(int codePoint) { 10840 return CharacterData.of(codePoint).isEmoji(codePoint); 10841 } 10842 10843 /** 10844 * Determines if the specified character (Unicode code point) has the 10845 * Emoji Presentation property by default. 10846 * <p> 10847 * A character is considered to have the Emoji Presentation property if and 10848 * only if it has the {@code Emoji_Presentation} property, defined in 10849 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10850 * Unicode Emoji (Technical Standard #51)</a>. 10851 * 10852 * @param codePoint the character (Unicode code point) to be tested. 10853 * @return {@code true} if the character has the Emoji Presentation 10854 * property; {@code false} otherwise. 10855 * @since 21 10856 */ 10857 public static boolean isEmojiPresentation(int codePoint) { 10858 return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 10859 } 10860 10861 /** 10862 * Determines if the specified character (Unicode code point) is an 10863 * Emoji Modifier. 10864 * <p> 10865 * A character is considered to be an Emoji Modifier if and only if it has 10866 * the {@code Emoji_Modifier} property, defined in 10867 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10868 * Unicode Emoji (Technical Standard #51)</a>. 10869 * 10870 * @param codePoint the character (Unicode code point) to be tested. 10871 * @return {@code true} if the character is an Emoji Modifier; 10872 * {@code false} otherwise. 10873 * @since 21 10874 */ 10875 public static boolean isEmojiModifier(int codePoint) { 10876 return CharacterData.of(codePoint).isEmojiModifier(codePoint); 10877 } 10878 10879 /** 10880 * Determines if the specified character (Unicode code point) is an 10881 * Emoji Modifier Base. 10882 * <p> 10883 * A character is considered to be an Emoji Modifier Base if and only if it has 10884 * the {@code Emoji_Modifier_Base} property, defined in 10885 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10886 * Unicode Emoji (Technical Standard #51)</a>. 10887 * 10888 * @param codePoint the character (Unicode code point) to be tested. 10889 * @return {@code true} if the character is an Emoji Modifier Base; 10890 * {@code false} otherwise. 10891 * @since 21 10892 */ 10893 public static boolean isEmojiModifierBase(int codePoint) { 10894 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 10895 } 10896 10897 /** 10898 * Determines if the specified character (Unicode code point) is an 10899 * Emoji Component. 10900 * <p> 10901 * A character is considered to be an Emoji Component if and only if it has 10902 * the {@code Emoji_Component} property, defined in 10903 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10904 * Unicode Emoji (Technical Standard #51)</a>. 10905 * 10906 * @param codePoint the character (Unicode code point) to be tested. 10907 * @return {@code true} if the character is an Emoji Component; 10908 * {@code false} otherwise. 10909 * @since 21 10910 */ 10911 public static boolean isEmojiComponent(int codePoint) { 10912 return CharacterData.of(codePoint).isEmojiComponent(codePoint); 10913 } 10914 10915 /** 10916 * Determines if the specified character (Unicode code point) is 10917 * an Extended Pictographic. 10918 * <p> 10919 * A character is considered to be an Extended Pictographic if and only if it has 10920 * the {@code Extended_Pictographic} property, defined in 10921 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 10922 * Unicode Emoji (Technical Standard #51)</a>. 10923 * 10924 * @param codePoint the character (Unicode code point) to be tested. 10925 * @return {@code true} if the character is an Extended Pictographic; 10926 * {@code false} otherwise. 10927 * @since 21 10928 */ 10929 public static boolean isExtendedPictographic(int codePoint) { 10930 return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 10931 } 10932 10933 /** 10934 * Converts the character argument to lowercase using case 10935 * mapping information from the UnicodeData file. 10936 * <p> 10937 * Note that 10938 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 10939 * does not always return {@code true} for some ranges of 10940 * characters, particularly those that are symbols or ideographs. 10941 * 10942 * <p>In general, {@link String#toLowerCase()} should be used to map 10943 * characters to lowercase. {@code String} case mapping methods 10944 * have several benefits over {@code Character} case mapping methods. 10945 * {@code String} case mapping methods can perform locale-sensitive 10946 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10947 * the {@code Character} case mapping methods cannot. 10948 * 10949 * <p><b>Note:</b> This method cannot handle <a 10950 * href="#supplementary"> supplementary characters</a>. To support 10951 * all Unicode characters, including supplementary characters, use 10952 * the {@link #toLowerCase(int)} method. 10953 * 10954 * @param ch the character to be converted. 10955 * @return the lowercase equivalent of the character, if any; 10956 * otherwise, the character itself. 10957 * @see Character#isLowerCase(char) 10958 * @see String#toLowerCase() 10959 */ 10960 public static char toLowerCase(char ch) { 10961 return (char)toLowerCase((int)ch); 10962 } 10963 10964 /** 10965 * Converts the character (Unicode code point) argument to 10966 * lowercase using case mapping information from the UnicodeData 10967 * file. 10968 * 10969 * <p> Note that 10970 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 10971 * does not always return {@code true} for some ranges of 10972 * characters, particularly those that are symbols or ideographs. 10973 * 10974 * <p>In general, {@link String#toLowerCase()} should be used to map 10975 * characters to lowercase. {@code String} case mapping methods 10976 * have several benefits over {@code Character} case mapping methods. 10977 * {@code String} case mapping methods can perform locale-sensitive 10978 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10979 * the {@code Character} case mapping methods cannot. 10980 * 10981 * @param codePoint the character (Unicode code point) to be converted. 10982 * @return the lowercase equivalent of the character (Unicode code 10983 * point), if any; otherwise, the character itself. 10984 * @see Character#isLowerCase(int) 10985 * @see String#toLowerCase() 10986 * 10987 * @since 1.5 10988 */ 10989 public static int toLowerCase(int codePoint) { 10990 return CharacterData.of(codePoint).toLowerCase(codePoint); 10991 } 10992 10993 /** 10994 * Converts the character argument to uppercase using case mapping 10995 * information from the UnicodeData file. 10996 * <p> 10997 * Note that 10998 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 10999 * does not always return {@code true} for some ranges of 11000 * characters, particularly those that are symbols or ideographs. 11001 * 11002 * <p>In general, {@link String#toUpperCase()} should be used to map 11003 * characters to uppercase. {@code String} case mapping methods 11004 * have several benefits over {@code Character} case mapping methods. 11005 * {@code String} case mapping methods can perform locale-sensitive 11006 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11007 * the {@code Character} case mapping methods cannot. 11008 * 11009 * <p><b>Note:</b> This method cannot handle <a 11010 * href="#supplementary"> supplementary characters</a>. To support 11011 * all Unicode characters, including supplementary characters, use 11012 * the {@link #toUpperCase(int)} method. 11013 * 11014 * @param ch the character to be converted. 11015 * @return the uppercase equivalent of the character, if any; 11016 * otherwise, the character itself. 11017 * @see Character#isUpperCase(char) 11018 * @see String#toUpperCase() 11019 */ 11020 public static char toUpperCase(char ch) { 11021 return (char)toUpperCase((int)ch); 11022 } 11023 11024 /** 11025 * Converts the character (Unicode code point) argument to 11026 * uppercase using case mapping information from the UnicodeData 11027 * file. 11028 * 11029 * <p>Note that 11030 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11031 * does not always return {@code true} for some ranges of 11032 * characters, particularly those that are symbols or ideographs. 11033 * 11034 * <p>In general, {@link String#toUpperCase()} should be used to map 11035 * characters to uppercase. {@code String} case mapping methods 11036 * have several benefits over {@code Character} case mapping methods. 11037 * {@code String} case mapping methods can perform locale-sensitive 11038 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11039 * the {@code Character} case mapping methods cannot. 11040 * 11041 * @param codePoint the character (Unicode code point) to be converted. 11042 * @return the uppercase equivalent of the character, if any; 11043 * otherwise, the character itself. 11044 * @see Character#isUpperCase(int) 11045 * @see String#toUpperCase() 11046 * 11047 * @since 1.5 11048 */ 11049 public static int toUpperCase(int codePoint) { 11050 return CharacterData.of(codePoint).toUpperCase(codePoint); 11051 } 11052 11053 /** 11054 * Converts the character argument to titlecase using case mapping 11055 * information from the UnicodeData file. If a character has no 11056 * explicit titlecase mapping and is not itself a titlecase char 11057 * according to UnicodeData, then the uppercase mapping is 11058 * returned as an equivalent titlecase mapping. If the 11059 * {@code char} argument is already a titlecase 11060 * {@code char}, the same {@code char} value will be 11061 * returned. 11062 * <p> 11063 * Note that 11064 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11065 * does not always return {@code true} for some ranges of 11066 * characters. 11067 * 11068 * <p><b>Note:</b> This method cannot handle <a 11069 * href="#supplementary"> supplementary characters</a>. To support 11070 * all Unicode characters, including supplementary characters, use 11071 * the {@link #toTitleCase(int)} method. 11072 * 11073 * @param ch the character to be converted. 11074 * @return the titlecase equivalent of the character, if any; 11075 * otherwise, the character itself. 11076 * @see Character#isTitleCase(char) 11077 * @see Character#toLowerCase(char) 11078 * @see Character#toUpperCase(char) 11079 * @since 1.0.2 11080 */ 11081 public static char toTitleCase(char ch) { 11082 return (char)toTitleCase((int)ch); 11083 } 11084 11085 /** 11086 * Converts the character (Unicode code point) argument to titlecase using case mapping 11087 * information from the UnicodeData file. If a character has no 11088 * explicit titlecase mapping and is not itself a titlecase char 11089 * according to UnicodeData, then the uppercase mapping is 11090 * returned as an equivalent titlecase mapping. If the 11091 * character argument is already a titlecase 11092 * character, the same character value will be 11093 * returned. 11094 * 11095 * <p>Note that 11096 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11097 * does not always return {@code true} for some ranges of 11098 * characters. 11099 * 11100 * @param codePoint the character (Unicode code point) to be converted. 11101 * @return the titlecase equivalent of the character, if any; 11102 * otherwise, the character itself. 11103 * @see Character#isTitleCase(int) 11104 * @see Character#toLowerCase(int) 11105 * @see Character#toUpperCase(int) 11106 * @since 1.5 11107 */ 11108 public static int toTitleCase(int codePoint) { 11109 return CharacterData.of(codePoint).toTitleCase(codePoint); 11110 } 11111 11112 /** 11113 * Returns the numeric value of the character {@code ch} in the 11114 * specified radix. 11115 * <p> 11116 * If the radix is not in the range {@code MIN_RADIX} ≤ 11117 * {@code radix} ≤ {@code MAX_RADIX} or if the 11118 * value of {@code ch} is not a valid digit in the specified 11119 * radix, {@code -1} is returned. A character is a valid digit 11120 * if at least one of the following is true: 11121 * <ul> 11122 * <li>The method {@code isDigit} is {@code true} of the character 11123 * and the Unicode decimal digit value of the character (or its 11124 * single-character decomposition) is less than the specified radix. 11125 * In this case the decimal digit value is returned. 11126 * <li>The character is one of the uppercase Latin letters 11127 * {@code 'A'} through {@code 'Z'} and its code is less than 11128 * {@code radix + 'A' - 10}. 11129 * In this case, {@code ch - 'A' + 10} 11130 * is returned. 11131 * <li>The character is one of the lowercase Latin letters 11132 * {@code 'a'} through {@code 'z'} and its code is less than 11133 * {@code radix + 'a' - 10}. 11134 * In this case, {@code ch - 'a' + 10} 11135 * is returned. 11136 * <li>The character is one of the fullwidth uppercase Latin letters A 11137 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11138 * and its code is less than 11139 * {@code radix + '\u005CuFF21' - 10}. 11140 * In this case, {@code ch - '\u005CuFF21' + 10} 11141 * is returned. 11142 * <li>The character is one of the fullwidth lowercase Latin letters a 11143 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11144 * and its code is less than 11145 * {@code radix + '\u005CuFF41' - 10}. 11146 * In this case, {@code ch - '\u005CuFF41' + 10} 11147 * is returned. 11148 * </ul> 11149 * 11150 * <p><b>Note:</b> This method cannot handle <a 11151 * href="#supplementary"> supplementary characters</a>. To support 11152 * all Unicode characters, including supplementary characters, use 11153 * the {@link #digit(int, int)} method. 11154 * 11155 * @param ch the character to be converted. 11156 * @param radix the radix. 11157 * @return the numeric value represented by the character in the 11158 * specified radix. 11159 * @see Character#forDigit(int, int) 11160 * @see Character#isDigit(char) 11161 */ 11162 public static int digit(char ch, int radix) { 11163 return digit((int)ch, radix); 11164 } 11165 11166 /** 11167 * Returns the numeric value of the specified character (Unicode 11168 * code point) in the specified radix. 11169 * 11170 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11171 * {@code radix} ≤ {@code MAX_RADIX} or if the 11172 * character is not a valid digit in the specified 11173 * radix, {@code -1} is returned. A character is a valid digit 11174 * if at least one of the following is true: 11175 * <ul> 11176 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11177 * and the Unicode decimal digit value of the character (or its 11178 * single-character decomposition) is less than the specified radix. 11179 * In this case the decimal digit value is returned. 11180 * <li>The character is one of the uppercase Latin letters 11181 * {@code 'A'} through {@code 'Z'} and its code is less than 11182 * {@code radix + 'A' - 10}. 11183 * In this case, {@code codePoint - 'A' + 10} 11184 * is returned. 11185 * <li>The character is one of the lowercase Latin letters 11186 * {@code 'a'} through {@code 'z'} and its code is less than 11187 * {@code radix + 'a' - 10}. 11188 * In this case, {@code codePoint - 'a' + 10} 11189 * is returned. 11190 * <li>The character is one of the fullwidth uppercase Latin letters A 11191 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11192 * and its code is less than 11193 * {@code radix + '\u005CuFF21' - 10}. 11194 * In this case, 11195 * {@code codePoint - '\u005CuFF21' + 10} 11196 * is returned. 11197 * <li>The character is one of the fullwidth lowercase Latin letters a 11198 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11199 * and its code is less than 11200 * {@code radix + '\u005CuFF41'- 10}. 11201 * In this case, 11202 * {@code codePoint - '\u005CuFF41' + 10} 11203 * is returned. 11204 * </ul> 11205 * 11206 * @param codePoint the character (Unicode code point) to be converted. 11207 * @param radix the radix. 11208 * @return the numeric value represented by the character in the 11209 * specified radix. 11210 * @see Character#forDigit(int, int) 11211 * @see Character#isDigit(int) 11212 * @since 1.5 11213 */ 11214 public static int digit(int codePoint, int radix) { 11215 return CharacterData.of(codePoint).digit(codePoint, radix); 11216 } 11217 11218 /** 11219 * Returns the {@code int} value that the specified Unicode 11220 * character represents. For example, the character 11221 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11222 * an int with a value of 50. 11223 * <p> 11224 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11225 * {@code '\u005Cu005A'}), lowercase 11226 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11227 * full width variant ({@code '\u005CuFF21'} through 11228 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11229 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11230 * through 35. This is independent of the Unicode specification, 11231 * which does not assign numeric values to these {@code char} 11232 * values. 11233 * <p> 11234 * If the character does not have a numeric value, then -1 is returned. 11235 * If the character has a numeric value that cannot be represented as a 11236 * nonnegative integer (for example, a fractional value), then -2 11237 * is returned. 11238 * 11239 * <p><b>Note:</b> This method cannot handle <a 11240 * href="#supplementary"> supplementary characters</a>. To support 11241 * all Unicode characters, including supplementary characters, use 11242 * the {@link #getNumericValue(int)} method. 11243 * 11244 * @param ch the character to be converted. 11245 * @return the numeric value of the character, as a nonnegative {@code int} 11246 * value; -2 if the character has a numeric value but the value 11247 * can not be represented as a nonnegative {@code int} value; 11248 * -1 if the character has no numeric value. 11249 * @see Character#forDigit(int, int) 11250 * @see Character#isDigit(char) 11251 * @since 1.1 11252 */ 11253 public static int getNumericValue(char ch) { 11254 return getNumericValue((int)ch); 11255 } 11256 11257 /** 11258 * Returns the {@code int} value that the specified 11259 * character (Unicode code point) represents. For example, the character 11260 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11261 * an {@code int} with a value of 50. 11262 * <p> 11263 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11264 * {@code '\u005Cu005A'}), lowercase 11265 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11266 * full width variant ({@code '\u005CuFF21'} through 11267 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11268 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11269 * through 35. This is independent of the Unicode specification, 11270 * which does not assign numeric values to these {@code char} 11271 * values. 11272 * <p> 11273 * If the character does not have a numeric value, then -1 is returned. 11274 * If the character has a numeric value that cannot be represented as a 11275 * nonnegative integer (for example, a fractional value), then -2 11276 * is returned. 11277 * 11278 * @param codePoint the character (Unicode code point) to be converted. 11279 * @return the numeric value of the character, as a nonnegative {@code int} 11280 * value; -2 if the character has a numeric value but the value 11281 * can not be represented as a nonnegative {@code int} value; 11282 * -1 if the character has no numeric value. 11283 * @see Character#forDigit(int, int) 11284 * @see Character#isDigit(int) 11285 * @since 1.5 11286 */ 11287 public static int getNumericValue(int codePoint) { 11288 return CharacterData.of(codePoint).getNumericValue(codePoint); 11289 } 11290 11291 /** 11292 * Determines if the specified character is ISO-LATIN-1 white space. 11293 * This method returns {@code true} for the following five 11294 * characters only: 11295 * <table class="striped"> 11296 * <caption style="display:none">truechars</caption> 11297 * <thead> 11298 * <tr><th scope="col">Character 11299 * <th scope="col">Code 11300 * <th scope="col">Name 11301 * </thead> 11302 * <tbody> 11303 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11304 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11305 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11306 * <td>{@code NEW LINE}</td></tr> 11307 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11308 * <td>{@code FORM FEED}</td></tr> 11309 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11310 * <td>{@code CARRIAGE RETURN}</td></tr> 11311 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11312 * <td>{@code SPACE}</td></tr> 11313 * </tbody> 11314 * </table> 11315 * 11316 * @param ch the character to be tested. 11317 * @return {@code true} if the character is ISO-LATIN-1 white 11318 * space; {@code false} otherwise. 11319 * @see Character#isSpaceChar(char) 11320 * @see Character#isWhitespace(char) 11321 * @deprecated Replaced by isWhitespace(char). 11322 */ 11323 @Deprecated(since="1.1") 11324 public static boolean isSpace(char ch) { 11325 return (ch <= 0x0020) && 11326 (((((1L << 0x0009) | 11327 (1L << 0x000A) | 11328 (1L << 0x000C) | 11329 (1L << 0x000D) | 11330 (1L << 0x0020)) >> ch) & 1L) != 0); 11331 } 11332 11333 11334 /** 11335 * Determines if the specified character is a Unicode space character. 11336 * A character is considered to be a space character if and only if 11337 * it is specified to be a space character by the Unicode Standard. This 11338 * method returns true if the character's general category type is any of 11339 * the following: 11340 * <ul> 11341 * <li> {@code SPACE_SEPARATOR} 11342 * <li> {@code LINE_SEPARATOR} 11343 * <li> {@code PARAGRAPH_SEPARATOR} 11344 * </ul> 11345 * 11346 * <p><b>Note:</b> This method cannot handle <a 11347 * href="#supplementary"> supplementary characters</a>. To support 11348 * all Unicode characters, including supplementary characters, use 11349 * the {@link #isSpaceChar(int)} method. 11350 * 11351 * @param ch the character to be tested. 11352 * @return {@code true} if the character is a space character; 11353 * {@code false} otherwise. 11354 * @see Character#isWhitespace(char) 11355 * @since 1.1 11356 */ 11357 public static boolean isSpaceChar(char ch) { 11358 return isSpaceChar((int)ch); 11359 } 11360 11361 /** 11362 * Determines if the specified character (Unicode code point) is a 11363 * Unicode space character. A character is considered to be a 11364 * space character if and only if it is specified to be a space 11365 * character by the Unicode Standard. This method returns true if 11366 * the character's general category type is any of the following: 11367 * 11368 * <ul> 11369 * <li> {@link #SPACE_SEPARATOR} 11370 * <li> {@link #LINE_SEPARATOR} 11371 * <li> {@link #PARAGRAPH_SEPARATOR} 11372 * </ul> 11373 * 11374 * @param codePoint the character (Unicode code point) to be tested. 11375 * @return {@code true} if the character is a space character; 11376 * {@code false} otherwise. 11377 * @see Character#isWhitespace(int) 11378 * @since 1.5 11379 */ 11380 public static boolean isSpaceChar(int codePoint) { 11381 return ((((1 << Character.SPACE_SEPARATOR) | 11382 (1 << Character.LINE_SEPARATOR) | 11383 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11384 != 0; 11385 } 11386 11387 /** 11388 * Determines if the specified character is white space according to Java. 11389 * A character is a Java whitespace character if and only if it satisfies 11390 * one of the following criteria: 11391 * <ul> 11392 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11393 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11394 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11395 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11396 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11397 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11398 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11399 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11400 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11401 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11402 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11403 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11404 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11405 * </ul> 11406 * 11407 * <p><b>Note:</b> This method cannot handle <a 11408 * href="#supplementary"> supplementary characters</a>. To support 11409 * all Unicode characters, including supplementary characters, use 11410 * the {@link #isWhitespace(int)} method. 11411 * 11412 * @param ch the character to be tested. 11413 * @return {@code true} if the character is a Java whitespace 11414 * character; {@code false} otherwise. 11415 * @see Character#isSpaceChar(char) 11416 * @since 1.1 11417 */ 11418 public static boolean isWhitespace(char ch) { 11419 return isWhitespace((int)ch); 11420 } 11421 11422 /** 11423 * Determines if the specified character (Unicode code point) is 11424 * white space according to Java. A character is a Java 11425 * whitespace character if and only if it satisfies one of the 11426 * following criteria: 11427 * <ul> 11428 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11429 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11430 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11431 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11432 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11433 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11434 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11435 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11436 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11437 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11438 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11439 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11440 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11441 * </ul> 11442 * 11443 * @param codePoint the character (Unicode code point) to be tested. 11444 * @return {@code true} if the character is a Java whitespace 11445 * character; {@code false} otherwise. 11446 * @see Character#isSpaceChar(int) 11447 * @since 1.5 11448 */ 11449 public static boolean isWhitespace(int codePoint) { 11450 return CharacterData.of(codePoint).isWhitespace(codePoint); 11451 } 11452 11453 /** 11454 * Determines if the specified character is an ISO control 11455 * character. A character is considered to be an ISO control 11456 * character if its code is in the range {@code '\u005Cu0000'} 11457 * through {@code '\u005Cu001F'} or in the range 11458 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11459 * 11460 * <p><b>Note:</b> This method cannot handle <a 11461 * href="#supplementary"> supplementary characters</a>. To support 11462 * all Unicode characters, including supplementary characters, use 11463 * the {@link #isISOControl(int)} method. 11464 * 11465 * @param ch the character to be tested. 11466 * @return {@code true} if the character is an ISO control character; 11467 * {@code false} otherwise. 11468 * 11469 * @see Character#isSpaceChar(char) 11470 * @see Character#isWhitespace(char) 11471 * @since 1.1 11472 */ 11473 public static boolean isISOControl(char ch) { 11474 return isISOControl((int)ch); 11475 } 11476 11477 /** 11478 * Determines if the referenced character (Unicode code point) is an ISO control 11479 * character. A character is considered to be an ISO control 11480 * character if its code is in the range {@code '\u005Cu0000'} 11481 * through {@code '\u005Cu001F'} or in the range 11482 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11483 * 11484 * @param codePoint the character (Unicode code point) to be tested. 11485 * @return {@code true} if the character is an ISO control character; 11486 * {@code false} otherwise. 11487 * @see Character#isSpaceChar(int) 11488 * @see Character#isWhitespace(int) 11489 * @since 1.5 11490 */ 11491 public static boolean isISOControl(int codePoint) { 11492 // Optimized form of: 11493 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11494 // (codePoint >= 0x7F && codePoint <= 0x9F); 11495 return codePoint <= 0x9F && 11496 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11497 } 11498 11499 /** 11500 * Returns a value indicating a character's general category. 11501 * 11502 * <p><b>Note:</b> This method cannot handle <a 11503 * href="#supplementary"> supplementary characters</a>. To support 11504 * all Unicode characters, including supplementary characters, use 11505 * the {@link #getType(int)} method. 11506 * 11507 * @param ch the character to be tested. 11508 * @return a value of type {@code int} representing the 11509 * character's general category. 11510 * @see Character#COMBINING_SPACING_MARK 11511 * @see Character#CONNECTOR_PUNCTUATION 11512 * @see Character#CONTROL 11513 * @see Character#CURRENCY_SYMBOL 11514 * @see Character#DASH_PUNCTUATION 11515 * @see Character#DECIMAL_DIGIT_NUMBER 11516 * @see Character#ENCLOSING_MARK 11517 * @see Character#END_PUNCTUATION 11518 * @see Character#FINAL_QUOTE_PUNCTUATION 11519 * @see Character#FORMAT 11520 * @see Character#INITIAL_QUOTE_PUNCTUATION 11521 * @see Character#LETTER_NUMBER 11522 * @see Character#LINE_SEPARATOR 11523 * @see Character#LOWERCASE_LETTER 11524 * @see Character#MATH_SYMBOL 11525 * @see Character#MODIFIER_LETTER 11526 * @see Character#MODIFIER_SYMBOL 11527 * @see Character#NON_SPACING_MARK 11528 * @see Character#OTHER_LETTER 11529 * @see Character#OTHER_NUMBER 11530 * @see Character#OTHER_PUNCTUATION 11531 * @see Character#OTHER_SYMBOL 11532 * @see Character#PARAGRAPH_SEPARATOR 11533 * @see Character#PRIVATE_USE 11534 * @see Character#SPACE_SEPARATOR 11535 * @see Character#START_PUNCTUATION 11536 * @see Character#SURROGATE 11537 * @see Character#TITLECASE_LETTER 11538 * @see Character#UNASSIGNED 11539 * @see Character#UPPERCASE_LETTER 11540 * @since 1.1 11541 */ 11542 public static int getType(char ch) { 11543 return getType((int)ch); 11544 } 11545 11546 /** 11547 * Returns a value indicating a character's general category. 11548 * 11549 * @param codePoint the character (Unicode code point) to be tested. 11550 * @return a value of type {@code int} representing the 11551 * character's general category. 11552 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11553 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11554 * @see Character#CONTROL CONTROL 11555 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11556 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11557 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11558 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11559 * @see Character#END_PUNCTUATION END_PUNCTUATION 11560 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11561 * @see Character#FORMAT FORMAT 11562 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11563 * @see Character#LETTER_NUMBER LETTER_NUMBER 11564 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11565 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11566 * @see Character#MATH_SYMBOL MATH_SYMBOL 11567 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11568 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11569 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11570 * @see Character#OTHER_LETTER OTHER_LETTER 11571 * @see Character#OTHER_NUMBER OTHER_NUMBER 11572 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11573 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11574 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11575 * @see Character#PRIVATE_USE PRIVATE_USE 11576 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11577 * @see Character#START_PUNCTUATION START_PUNCTUATION 11578 * @see Character#SURROGATE SURROGATE 11579 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11580 * @see Character#UNASSIGNED UNASSIGNED 11581 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11582 * @since 1.5 11583 */ 11584 public static int getType(int codePoint) { 11585 return CharacterData.of(codePoint).getType(codePoint); 11586 } 11587 11588 /** 11589 * Determines the character representation for a specific digit in 11590 * the specified radix. If the value of {@code radix} is not a 11591 * valid radix, or the value of {@code digit} is not a valid 11592 * digit in the specified radix, the null character 11593 * ({@code '\u005Cu0000'}) is returned. 11594 * <p> 11595 * The {@code radix} argument is valid if it is greater than or 11596 * equal to {@code MIN_RADIX} and less than or equal to 11597 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11598 * {@code 0 <= digit < radix}. 11599 * <p> 11600 * If the digit is less than 10, then 11601 * {@code '0' + digit} is returned. Otherwise, the value 11602 * {@code 'a' + digit - 10} is returned. 11603 * 11604 * @param digit the number to convert to a character. 11605 * @param radix the radix. 11606 * @return the {@code char} representation of the specified digit 11607 * in the specified radix. 11608 * @see Character#MIN_RADIX 11609 * @see Character#MAX_RADIX 11610 * @see Character#digit(char, int) 11611 */ 11612 public static char forDigit(int digit, int radix) { 11613 if ((digit >= radix) || (digit < 0)) { 11614 return '\0'; 11615 } 11616 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11617 return '\0'; 11618 } 11619 if (digit < 10) { 11620 return (char)('0' + digit); 11621 } 11622 return (char)('a' - 10 + digit); 11623 } 11624 11625 /** 11626 * Returns the Unicode directionality property for the given 11627 * character. Character directionality is used to calculate the 11628 * visual ordering of text. The directionality value of undefined 11629 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11630 * 11631 * <p><b>Note:</b> This method cannot handle <a 11632 * href="#supplementary"> supplementary characters</a>. To support 11633 * all Unicode characters, including supplementary characters, use 11634 * the {@link #getDirectionality(int)} method. 11635 * 11636 * @param ch {@code char} for which the directionality property 11637 * is requested. 11638 * @return the directionality property of the {@code char} value. 11639 * 11640 * @see Character#DIRECTIONALITY_UNDEFINED 11641 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11642 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11643 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11644 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11645 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11646 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11647 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11648 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11649 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11650 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11651 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11652 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11653 * @see Character#DIRECTIONALITY_WHITESPACE 11654 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11655 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11656 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11657 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11658 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11659 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11660 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11661 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11662 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11663 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11664 * @since 1.4 11665 */ 11666 public static byte getDirectionality(char ch) { 11667 return getDirectionality((int)ch); 11668 } 11669 11670 /** 11671 * Returns the Unicode directionality property for the given 11672 * character (Unicode code point). Character directionality is 11673 * used to calculate the visual ordering of text. The 11674 * directionality value of undefined character is {@link 11675 * #DIRECTIONALITY_UNDEFINED}. 11676 * 11677 * @param codePoint the character (Unicode code point) for which 11678 * the directionality property is requested. 11679 * @return the directionality property of the character. 11680 * 11681 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11682 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11683 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11684 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11685 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11686 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11687 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11688 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11689 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11690 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11691 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11692 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11693 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11694 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11695 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11696 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11697 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11698 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11699 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11700 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11701 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11702 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11703 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11704 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11705 * @since 1.5 11706 */ 11707 public static byte getDirectionality(int codePoint) { 11708 return CharacterData.of(codePoint).getDirectionality(codePoint); 11709 } 11710 11711 /** 11712 * Determines whether the character is mirrored according to the 11713 * Unicode specification. Mirrored characters should have their 11714 * glyphs horizontally mirrored when displayed in text that is 11715 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11716 * PARENTHESIS is semantically defined to be an <i>opening 11717 * parenthesis</i>. This will appear as a "(" in text that is 11718 * left-to-right but as a ")" in text that is right-to-left. 11719 * 11720 * <p><b>Note:</b> This method cannot handle <a 11721 * href="#supplementary"> supplementary characters</a>. To support 11722 * all Unicode characters, including supplementary characters, use 11723 * the {@link #isMirrored(int)} method. 11724 * 11725 * @param ch {@code char} for which the mirrored property is requested 11726 * @return {@code true} if the char is mirrored, {@code false} 11727 * if the {@code char} is not mirrored or is not defined. 11728 * @since 1.4 11729 */ 11730 public static boolean isMirrored(char ch) { 11731 return isMirrored((int)ch); 11732 } 11733 11734 /** 11735 * Determines whether the specified character (Unicode code point) 11736 * is mirrored according to the Unicode specification. Mirrored 11737 * characters should have their glyphs horizontally mirrored when 11738 * displayed in text that is right-to-left. For example, 11739 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 11740 * defined to be an <i>opening parenthesis</i>. This will appear 11741 * as a "(" in text that is left-to-right but as a ")" in text 11742 * that is right-to-left. 11743 * 11744 * @param codePoint the character (Unicode code point) to be tested. 11745 * @return {@code true} if the character is mirrored, {@code false} 11746 * if the character is not mirrored or is not defined. 11747 * @since 1.5 11748 */ 11749 public static boolean isMirrored(int codePoint) { 11750 return CharacterData.of(codePoint).isMirrored(codePoint); 11751 } 11752 11753 /** 11754 * Compares two {@code Character} objects numerically. 11755 * 11756 * @param anotherCharacter the {@code Character} to be compared. 11757 * @return the value {@code 0} if the argument {@code Character} 11758 * is equal to this {@code Character}; a value less than 11759 * {@code 0} if this {@code Character} is numerically less 11760 * than the {@code Character} argument; and a value greater than 11761 * {@code 0} if this {@code Character} is numerically greater 11762 * than the {@code Character} argument (unsigned comparison). 11763 * Note that this is strictly a numerical comparison; it is not 11764 * locale-dependent. 11765 * @since 1.2 11766 */ 11767 public int compareTo(Character anotherCharacter) { 11768 return compare(this.value, anotherCharacter.value); 11769 } 11770 11771 /** 11772 * Compares two {@code char} values numerically. 11773 * The value returned is identical to what would be returned by: 11774 * <pre> 11775 * Character.valueOf(x).compareTo(Character.valueOf(y)) 11776 * </pre> 11777 * 11778 * @param x the first {@code char} to compare 11779 * @param y the second {@code char} to compare 11780 * @return the value {@code 0} if {@code x == y}; 11781 * a value less than {@code 0} if {@code x < y}; and 11782 * a value greater than {@code 0} if {@code x > y} 11783 * @since 1.7 11784 */ 11785 public static int compare(char x, char y) { 11786 return x - y; 11787 } 11788 11789 /** 11790 * Converts the character (Unicode code point) argument to uppercase using 11791 * information from the UnicodeData file. 11792 * 11793 * @param codePoint the character (Unicode code point) to be converted. 11794 * @return either the uppercase equivalent of the character, if 11795 * any, or an error flag ({@code Character.ERROR}) 11796 * that indicates that a 1:M {@code char} mapping exists. 11797 * @see Character#isLowerCase(char) 11798 * @see Character#isUpperCase(char) 11799 * @see Character#toLowerCase(char) 11800 * @see Character#toTitleCase(char) 11801 * @since 1.4 11802 */ 11803 static int toUpperCaseEx(int codePoint) { 11804 assert isValidCodePoint(codePoint); 11805 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 11806 } 11807 11808 /** 11809 * Converts the character (Unicode code point) argument to uppercase using case 11810 * mapping information from the SpecialCasing file in the Unicode 11811 * specification. If a character has no explicit uppercase 11812 * mapping, then the {@code char} itself is returned in the 11813 * {@code char[]}. 11814 * 11815 * @param codePoint the character (Unicode code point) to be converted. 11816 * @return a {@code char[]} with the uppercased character. 11817 * @since 1.4 11818 */ 11819 static char[] toUpperCaseCharArray(int codePoint) { 11820 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 11821 assert isBmpCodePoint(codePoint); 11822 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 11823 } 11824 11825 /** 11826 * The number of bits used to represent a {@code char} value in unsigned 11827 * binary form, constant {@code 16}. 11828 * 11829 * @since 1.5 11830 */ 11831 public static final int SIZE = 16; 11832 11833 /** 11834 * The number of bytes used to represent a {@code char} value in unsigned 11835 * binary form. 11836 * 11837 * @since 1.8 11838 */ 11839 public static final int BYTES = SIZE / Byte.SIZE; 11840 11841 /** 11842 * Returns the value obtained by reversing the order of the bytes in the 11843 * specified {@code char} value. 11844 * 11845 * @param ch The {@code char} of which to reverse the byte order. 11846 * @return the value obtained by reversing (or, equivalently, swapping) 11847 * the bytes in the specified {@code char} value. 11848 * @since 1.5 11849 */ 11850 @IntrinsicCandidate 11851 public static char reverseBytes(char ch) { 11852 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 11853 } 11854 11855 /** 11856 * Returns the name of the specified character 11857 * {@code codePoint}, or null if the code point is 11858 * {@link #UNASSIGNED unassigned}. 11859 * <p> 11860 * If the specified character is not assigned a name by 11861 * the <i>UnicodeData</i> file (part of the Unicode Character 11862 * Database maintained by the Unicode Consortium), the returned 11863 * name is the same as the result of the expression: 11864 * 11865 * <blockquote>{@code 11866 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11867 * + " " 11868 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11869 * 11870 * }</blockquote> 11871 * 11872 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 11873 * returned by this method follows the naming scheme in the 11874 * "Unicode Name Property" section of the Unicode Standard. For other 11875 * code points, such as Hangul/Ideographs, The name generation rule above 11876 * differs from the one defined in the Unicode Standard. 11877 * 11878 * @param codePoint the character (Unicode code point) 11879 * 11880 * @return the name of the specified character, or null if 11881 * the code point is unassigned. 11882 * 11883 * @throws IllegalArgumentException if the specified 11884 * {@code codePoint} is not a valid Unicode 11885 * code point. 11886 * 11887 * @since 1.7 11888 */ 11889 public static String getName(int codePoint) { 11890 if (!isValidCodePoint(codePoint)) { 11891 throw new IllegalArgumentException( 11892 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 11893 } 11894 String name = CharacterName.getInstance().getName(codePoint); 11895 if (name != null) 11896 return name; 11897 if (getType(codePoint) == UNASSIGNED) 11898 return null; 11899 UnicodeBlock block = UnicodeBlock.of(codePoint); 11900 if (block != null) 11901 return block.toString().replace('_', ' ') + " " 11902 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11903 // should never come here 11904 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11905 } 11906 11907 /** 11908 * Returns the code point value of the Unicode character specified by 11909 * the given character name. 11910 * <p> 11911 * If a character is not assigned a name by the <i>UnicodeData</i> 11912 * file (part of the Unicode Character Database maintained by the Unicode 11913 * Consortium), its name is defined as the result of the expression: 11914 * 11915 * <blockquote>{@code 11916 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11917 * + " " 11918 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11919 * 11920 * }</blockquote> 11921 * <p> 11922 * The {@code name} matching is case insensitive, with any leading and 11923 * trailing whitespace character removed. 11924 * 11925 * For the code points in the <i>UnicodeData</i> file, this method 11926 * recognizes the name which conforms to the name defined in the 11927 * "Unicode Name Property" section in the Unicode Standard. For other 11928 * code points, this method recognizes the name generated with 11929 * {@link #getName(int)} method. 11930 * 11931 * @param name the character name 11932 * 11933 * @return the code point value of the character specified by its name. 11934 * 11935 * @throws IllegalArgumentException if the specified {@code name} 11936 * is not a valid character name. 11937 * @throws NullPointerException if {@code name} is {@code null} 11938 * 11939 * @since 9 11940 */ 11941 public static int codePointOf(String name) { 11942 name = name.trim().toUpperCase(Locale.ROOT); 11943 int cp = CharacterName.getInstance().getCodePoint(name); 11944 if (cp != -1) 11945 return cp; 11946 try { 11947 int off = name.lastIndexOf(' '); 11948 if (off != -1) { 11949 cp = Integer.parseInt(name, off + 1, name.length(), 16); 11950 if (isValidCodePoint(cp) && name.equals(getName(cp))) 11951 return cp; 11952 } 11953 } catch (Exception x) {} 11954 throw new IllegalArgumentException("Unrecognized character name :" + name); 11955 } 11956 }